diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9ff575865e3..231f6ecf449 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -6,6 +6,7 @@ ARG PYTHON_PACKAGE_MANAGER=conda FROM ${BASE} as pip-base ENV DEFAULT_VIRTUAL_ENV=rapids +ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true RUN apt update -y \ && DEBIAN_FRONTEND=noninteractive apt install -y \ @@ -33,6 +34,3 @@ ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV HISTFILE="/home/coder/.cache/._bash_history" - -# cugraph_pyg's setup.py needs this defined when building in a conda env -ENV CUDA_HOME="${CUDA_HOME:-/home/coder/.conda/envs/$DEFAULT_CONDA_ENV}" diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index f5886540252..fd559d7e96c 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 270bfa239ad..f2abf493e12 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index e31428e4b0c..3ed6fa9c37b 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 835274999ba..7024c0f0a22 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "12.5", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml index 895ba83ee54..e0ea775aad5 100644 --- a/.github/copy-pr-bot.yaml +++ b/.github/copy-pr-bot.yaml @@ -2,3 +2,4 @@ # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ enabled: true +auto_sync_draft: false diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 2962b4438fe..357d0d9bcae 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -47,7 +47,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,21 +69,18 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} script: ci/build_wheel_pylibcugraph.sh - extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.12 - extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY node_type: cpu32 wheel-publish-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -93,20 +90,17 @@ jobs: wheel-build-cugraph: needs: wheel-publish-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} script: ci/build_wheel_cugraph.sh - extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.12 - extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY wheel-publish-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e82342dfd94..c9e41475a1e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -10,6 +10,7 @@ concurrency: cancel-in-progress: true jobs: + # Please keep pr-builder as the top job here pr-builder: needs: - changed-files @@ -25,20 +26,30 @@ jobs: - wheel-tests-pylibcugraph - wheel-build-cugraph - wheel-tests-cugraph + - telemetry-setup - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} + telemetry-setup: + runs-on: ubuntu-latest + continue-on-error: true + env: + OTEL_SERVICE_NAME: "pr-cugraph" + steps: + - name: Telemetry setup + uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + needs: telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: - '**' - - '!.devcontainers/**' + - '!.devcontainer/**' - '!CONTRIBUTING.md' - '!README.md' - '!docs/**' @@ -49,13 +60,13 @@ jobs: - '!readme_pages/**' test_notebooks: - '**' - - '!.devcontainers/**' + - '!.devcontainer/**' - '!CONTRIBUTING.md' - '!README.md' - '!docs/**' test_python: - '**' - - '!.devcontainers/**' + - '!.devcontainer/**' - '!CONTRIBUTING.md' - '!README.md' - '!docs/**' @@ -63,27 +74,29 @@ jobs: - '!notebooks/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 + needs: telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false + ignored_pr_jobs: telemetry-summarize conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request node_type: cpu32 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true @@ -91,20 +104,20 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request conda-notebook-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks with: build_type: pull-request @@ -115,7 +128,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -125,49 +138,57 @@ jobs: wheel-build-pylibcugraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_pylibcugraph.sh - extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.12 - extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY node_type: cpu32 wheel-tests-pylibcugraph: needs: [wheel-build-pylibcugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_pylibcugraph.sh wheel-build-cugraph: - needs: wheel-tests-pylibcugraph + needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_cugraph.sh - extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.12 - extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY wheel-tests-cugraph: needs: [wheel-build-cugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_cugraph.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 + needs: telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.5"]' node_type: cpu32 - extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY build_command: | sccache -z; build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON; sccache -s; + telemetry-summarize: + runs-on: ubuntu-latest + needs: pr-builder + if: always() + continue-on-error: true + steps: + - name: Load stashed telemetry env vars + uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main + with: + load_service_name: true + - name: Telemetry summarize + uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main + with: + cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 72f76904f4b..4aa698c987f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibcugraph.sh wheel-tests-cugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml new file mode 100644 index 00000000000..01dd2436beb --- /dev/null +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -0,0 +1,26 @@ +name: Trigger Breaking Change Notifications + +on: + pull_request_target: + types: + - closed + - reopened + - labeled + - unlabeled + +jobs: + trigger-notifier: + if: contains(github.event.pull_request.labels.*.name, 'breaking') + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 + with: + sender_login: ${{ github.event.sender.login }} + sender_avatar: ${{ github.event.sender.avatar_url }} + repo: ${{ github.repository }} + pr_number: ${{ github.event.pull_request.number }} + pr_title: "${{ github.event.pull_request.title }}" + pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}" + pr_base_ref: ${{ github.event.pull_request.base.ref }} + pr_author: ${{ github.event.pull_request.user.login }} + event_action: ${{ github.event.action }} + pr_merged: ${{ github.event.pull_request.merged }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4bb037b5fda..28f83a967c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,6 +53,11 @@ repos: meta[.]yaml$| setup[.]cfg$ - id: verify-alpha-spec + - repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v1.0.0 + hooks: + - id: sphinx-lint + args: ["--enable=all", "--disable=line-too-long"] - repo: https://github.com/rapidsai/dependency-file-generator rev: v1.16.0 hooks: diff --git a/README.md b/README.md index e41caec17b0..820e6737590 100644 --- a/README.md +++ b/README.md @@ -34,16 +34,6 @@ ------ -## News - -___NEW!___ _[nx-cugraph](https://rapids.ai/nx-cugraph/)_, a NetworkX backend that provides GPU acceleration to NetworkX with zero code change. -``` -> pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com -> export NETWORKX_AUTOMATIC_BACKENDS=cugraph -``` -That's it. NetworkX now leverages cuGraph for accelerated graph algorithms. - ----- ## Table of contents @@ -64,8 +54,6 @@ That's it. NetworkX now leverages cuGraph for accelerated graph algorithms. - [libcugraph (C/C++/CUDA)](./readme_pages/libcugraph.md) - [nx-cugraph](https://rapids.ai/nx-cugraph/) - [cugraph-service](./readme_pages/cugraph_service.md) - - [cugraph-dgl](./readme_pages/cugraph_dgl.md) - - [cugraph-ops](./readme_pages/cugraph_ops.md) - API Docs - Python - [Python Nightly](https://docs.rapids.ai/api/cugraph/nightly/) diff --git a/VERSION b/VERSION index af28c42b528..72eefaf7c79 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.12.00 +25.02.00 diff --git a/build.sh b/build.sh index 398582c74c0..0282948659e 100755 --- a/build.sh +++ b/build.sh @@ -29,8 +29,6 @@ VALIDARGS=" pylibcugraph cugraph cugraph-service - cugraph-pyg - cugraph-dgl cpp-mgtests cpp-mtmgtests docs @@ -56,8 +54,6 @@ HELP="$0 [ ...] [ ...] pylibcugraph - build the pylibcugraph Python package cugraph - build the cugraph Python package cugraph-service - build the cugraph-service_client and cugraph-service_server Python package - cugraph-pyg - build the cugraph-pyg Python package - cugraph-dgl - build the cugraph-dgl extensions for DGL cpp-mgtests - build libcugraph and libcugraph_etl MG tests. Builds MPI communicator, adding MPI as a dependency. cpp-mtmgtests - build libcugraph MTMG tests. Adds UCX as a dependency (temporary). docs - build the docs @@ -84,12 +80,10 @@ LIBCUGRAPH_ETL_BUILD_DIR=${LIBCUGRAPH_ETL_BUILD_DIR:=${REPODIR}/cpp/libcugraph_e CUGRAPH_SERVICE_BUILD_DIRS="${REPODIR}/python/cugraph-service/server/build ${REPODIR}/python/cugraph-service/client/build " -CUGRAPH_DGL_BUILD_DIR=${REPODIR}/python/cugraph-dgl/build BUILD_DIRS="${LIBCUGRAPH_BUILD_DIR} ${LIBCUGRAPH_ETL_BUILD_DIR} ${CUGRAPH_SERVICE_BUILD_DIRS} - ${CUGRAPH_DGL_BUILD_DIR} " # Set defaults for vars modified by flags to this script @@ -325,24 +319,6 @@ if hasArg cugraph-service || hasArg all; then fi fi -# Build and install the cugraph-pyg Python package -if hasArg cugraph-pyg || hasArg all; then - if hasArg --clean; then - cleanPythonDir ${REPODIR}/python/cugraph-pyg - else - python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/cugraph-pyg - fi -fi - -# Install the cugraph-dgl extensions for DGL -if hasArg cugraph-dgl || hasArg all; then - if hasArg --clean; then - cleanPythonDir ${REPODIR}/python/cugraph-dgl - else - python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/cugraph-dgl - fi -fi - # Build the docs if hasArg docs || hasArg all; then if [ ! -d ${LIBCUGRAPH_BUILD_DIR} ]; then @@ -355,17 +331,17 @@ if hasArg docs || hasArg all; then ${CMAKE_VERBOSE_OPTION} fi - for PROJECT in libcugraphops libwholegraph; do - XML_DIR="${REPODIR}/docs/cugraph/${PROJECT}" - rm -rf "${XML_DIR}" - mkdir -p "${XML_DIR}" - export XML_DIR_${PROJECT^^}="$XML_DIR" - - echo "downloading xml for ${PROJECT} into ${XML_DIR}. Environment variable XML_DIR_${PROJECT^^} is set to ${XML_DIR}" - curl -O "https://d1664dvumjb44w.cloudfront.net/${PROJECT}/xml_tar/${RAPIDS_VERSION}/xml.tar.gz" - tar -xzf xml.tar.gz -C "${XML_DIR}" - rm "./xml.tar.gz" - done + # for PROJECT in libwholegraph; do + # XML_DIR="${REPODIR}/docs/cugraph/${PROJECT}" + # rm -rf "${XML_DIR}" + # mkdir -p "${XML_DIR}" + # export XML_DIR_${PROJECT^^}="$XML_DIR" + + # echo "downloading xml for ${PROJECT} into ${XML_DIR}. Environment variable XML_DIR_${PROJECT^^} is set to ${XML_DIR}" + # curl -O "https://d1664dvumjb44w.cloudfront.net/${PROJECT}/xml_tar/${RAPIDS_VERSION}/xml.tar.gz" + # tar -xzf xml.tar.gz -C "${XML_DIR}" + # rm "./xml.tar.gz" + # done cd ${LIBCUGRAPH_BUILD_DIR} cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target docs_cugraph ${VERBOSE_FLAG} diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 2d7e90da8d0..ad77c9a0e60 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -46,19 +46,18 @@ rapids-mamba-retry install \ "cugraph-service-server=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ "cugraph-service-client=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ "libcugraph_etl=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ - "pylibcugraphops=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ "pylibwholegraph=${RAPIDS_VERSION_MAJOR_MINOR}.*" \ 'pytorch>=2.3' \ "cuda-version=${CONDA_CUDA_VERSION}" export RAPIDS_DOCS_DIR="$(mktemp -d)" -for PROJECT in libcugraphops libwholegraph; do - rapids-logger "Download ${PROJECT} xml_tar" - TMP_DIR=$(mktemp -d) - export XML_DIR_${PROJECT^^}="$TMP_DIR" - curl "https://d1664dvumjb44w.cloudfront.net/${PROJECT}/xml_tar/${RAPIDS_VERSION_NUMBER}/xml.tar.gz" | tar -xzf - -C "${TMP_DIR}" -done +# for PROJECT in libwholegraph; do +# rapids-logger "Download ${PROJECT} xml_tar" +# TMP_DIR=$(mktemp -d) +# export XML_DIR_${PROJECT^^}="$TMP_DIR" +# curl "https://d1664dvumjb44w.cloudfront.net/${PROJECT}/xml_tar/${RAPIDS_VERSION_NUMBER}/xml.tar.gz" | tar -xzf - -C "${TMP_DIR}" +# done rapids-logger "Build CPP docs" pushd cpp/doxygen diff --git a/ci/build_wheel_cugraph.sh b/ci/build_wheel_cugraph.sh index b937fd7f6d8..11ab9acc58c 100755 --- a/ci/build_wheel_cugraph.sh +++ b/ci/build_wheel_cugraph.sh @@ -30,7 +30,7 @@ case "${RAPIDS_CUDA_VERSION}" in ;; esac -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/${EXTRA_CMAKE_ARGS}" +export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF${EXTRA_CMAKE_ARGS}" export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}" ./ci/build_wheel.sh cugraph ${package_dir} diff --git a/ci/build_wheel_pylibcugraph.sh b/ci/build_wheel_pylibcugraph.sh index 928e12d2350..8bdfa20ee77 100755 --- a/ci/build_wheel_pylibcugraph.sh +++ b/ci/build_wheel_pylibcugraph.sh @@ -17,7 +17,7 @@ case "${RAPIDS_CUDA_VERSION}" in ;; esac -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/${EXTRA_CMAKE_ARGS}" +export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF${EXTRA_CMAKE_ARGS}" export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}" ./ci/build_wheel.sh pylibcugraph ${package_dir} diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index be1988e31dd..a73745f2c0e 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -51,20 +51,16 @@ NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; p DEPENDENCIES=( cudf cugraph - cugraph-dgl - cugraph-pyg cugraph-service-server cugraph-service-client cuxfilter dask-cuda dask-cudf libcudf - libcugraphops libraft libraft-headers librmm pylibcugraph - pylibcugraphops pylibwholegraph pylibraft pyraft @@ -75,7 +71,7 @@ DEPENDENCIES=( UCXX_DEPENDENCIES=( ucx-py ) -for FILE in dependencies.yaml conda/environments/*.yaml python/cugraph-{pyg,dgl}/conda/*.yaml; do +for FILE in dependencies.yaml conda/environments/*.yaml; do for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done @@ -100,8 +96,6 @@ done # CI files for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" - # Wheel builds clone cugraph-ops, update its branch - sed_runner "s/extra-repo-sha: branch-.*/extra-repo-sha: branch-${NEXT_SHORT_TAG}/g" "${FILE}" # Wheel builds install dask-cuda from source, update its branch sed_runner "s/dask-cuda.git@branch-[0-9][0-9].[0-9][0-9]/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done diff --git a/ci/run_cugraph_dgl_pytests.sh b/ci/run_cugraph_dgl_pytests.sh deleted file mode 100755 index 83c26a57dc0..00000000000 --- a/ci/run_cugraph_dgl_pytests.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, NVIDIA CORPORATION. - -set -euo pipefail - -# Support invoking run_cugraph_dgl_pytests.sh outside the script directory -cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-dgl/tests - -pytest --cache-clear --ignore=mg "$@" . diff --git a/ci/run_cugraph_pyg_pytests.sh b/ci/run_cugraph_pyg_pytests.sh deleted file mode 100755 index fb27f16d79e..00000000000 --- a/ci/run_cugraph_pyg_pytests.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, NVIDIA CORPORATION. - -set -euo pipefail - -# Support invoking run_cugraph_pyg_pytests.sh outside the script directory -cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cugraph-pyg/cugraph_pyg - -pytest --cache-clear --benchmark-disable "$@" . - -# Used to skip certain examples in CI due to memory limitations -export CI_RUN=1 - -# Test examples -for e in "$(pwd)"/examples/*.py; do - rapids-logger "running example $e" - (yes || true) | python $e -done diff --git a/ci/test.sh b/ci/test.sh index 884ed7ac881..8e19b6c8c18 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -99,13 +99,6 @@ if hasArg "--run-python-tests"; then pytest -sv -m sg -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable echo "Ran Python benchmarks for cuGraph (running as tests) : return code was: $?, test script exit code is now: $EXITCODE" - echo "Python pytest for cugraph_pyg (single-GPU only)..." - conda list - cd ${CUGRAPH_ROOT}/python/cugraph-pyg/cugraph_pyg - # rmat is not tested because of MG testing - pytest -sv -m sg --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph-pytests.xml -v --cov-config=.coveragerc --cov=cugraph_pyg --cov-report=xml:${WORKSPACE}/python/cugraph_pyg/cugraph-coverage.xml --cov-report term --ignore=raft --benchmark-disable - echo "Ran Python pytest for cugraph_pyg : return code was: $?, test script exit code is now: $EXITCODE" - echo "Python pytest for cugraph-service (single-GPU only)..." cd ${CUGRAPH_ROOT}/python/cugraph-service pytest -sv --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph-service-pytests.xml --benchmark-disable -k "not mg" ./tests diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index ec4c23541f9..e9f6842e8b8 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -4,7 +4,6 @@ channels: - rapidsai - rapidsai-nightly - dask/label/dev -- dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: @@ -15,22 +14,21 @@ dependencies: - cuda-nvtx - cuda-version=11.8 - cudatoolkit -- cudf==24.12.*,>=0.0.0a0 +- cudf==25.2.*,>=0.0.0a0 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.12.*,>=0.0.0a0 -- dask-cudf==24.12.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- dask-cudf==25.2.*,>=0.0.0a0 - doxygen - fsspec>=0.6.0 - gcc_linux-64=11.* - graphviz - ipython -- libcudf==24.12.*,>=0.0.0a0 -- libcugraphops==24.12.*,>=0.0.0a0 -- libraft-headers==24.12.*,>=0.0.0a0 -- libraft==24.12.*,>=0.0.0a0 -- librmm==24.12.*,>=0.0.0a0 +- libcudf==25.2.*,>=0.0.0a0 +- libraft-headers==25.2.*,>=0.0.0a0 +- libraft==25.2.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - nbsphinx - nccl>=2.19 - networkx>=2.5.1 @@ -46,21 +44,20 @@ dependencies: - pre-commit - pydantic - pydata-sphinx-theme -- pylibcugraphops==24.12.*,>=0.0.0a0 -- pylibraft==24.12.*,>=0.0.0a0 -- pylibwholegraph==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 +- pylibwholegraph==25.2.*,>=0.0.0a0 - pytest - pytest-benchmark - pytest-cov - pytest-xdist - python-louvain - pytorch>=2.3 -- raft-dask==24.12.*,>=0.0.0a0 +- raft-dask==25.2.*,>=0.0.0a0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark - requests -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=0.23.1 - scipy @@ -73,7 +70,7 @@ dependencies: - torchdata - torchmetrics - ucx-proc=*=gpu -- ucx-py==0.41.*,>=0.0.0a0 +- ucx-py==0.42.*,>=0.0.0a0 - wget - wheel name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index de0507c7c22..13e102862ab 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -4,7 +4,6 @@ channels: - rapidsai - rapidsai-nightly - dask/label/dev -- dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: @@ -17,26 +16,25 @@ dependencies: - cuda-nvtx-dev - cuda-profiler-api - cuda-version=12.5 -- cudf==24.12.*,>=0.0.0a0 +- cudf==25.2.*,>=0.0.0a0 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.12.*,>=0.0.0a0 -- dask-cudf==24.12.*,>=0.0.0a0 +- dask-cuda==25.2.*,>=0.0.0a0 +- dask-cudf==25.2.*,>=0.0.0a0 - doxygen - fsspec>=0.6.0 - gcc_linux-64=11.* - graphviz - ipython - libcublas-dev -- libcudf==24.12.*,>=0.0.0a0 -- libcugraphops==24.12.*,>=0.0.0a0 +- libcudf==25.2.*,>=0.0.0a0 - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==24.12.*,>=0.0.0a0 -- libraft==24.12.*,>=0.0.0a0 -- librmm==24.12.*,>=0.0.0a0 +- libraft-headers==25.2.*,>=0.0.0a0 +- libraft==25.2.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - nbsphinx - nccl>=2.19 - networkx>=2.5.1 @@ -51,21 +49,20 @@ dependencies: - pre-commit - pydantic - pydata-sphinx-theme -- pylibcugraphops==24.12.*,>=0.0.0a0 -- pylibraft==24.12.*,>=0.0.0a0 -- pylibwholegraph==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 +- pylibwholegraph==25.2.*,>=0.0.0a0 - pytest - pytest-benchmark - pytest-cov - pytest-xdist - python-louvain - pytorch>=2.3 -- raft-dask==24.12.*,>=0.0.0a0 +- raft-dask==25.2.*,>=0.0.0a0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 -- rapids-dask-dependency==24.12.*,>=0.0.0a0 +- rapids-dask-dependency==25.2.*,>=0.0.0a0 - recommonmark - requests -- rmm==24.12.*,>=0.0.0a0 +- rmm==25.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=0.23.1 - scipy @@ -78,7 +75,7 @@ dependencies: - torchdata - torchmetrics - ucx-proc=*=gpu -- ucx-py==0.41.*,>=0.0.0a0 +- ucx-py==0.42.*,>=0.0.0a0 - wget - wheel name: all_cuda-125_arch-x86_64 diff --git a/conda/recipes/cugraph-dgl/build.sh b/conda/recipes/cugraph-dgl/build.sh deleted file mode 100644 index 14d29b7eab9..00000000000 --- a/conda/recipes/cugraph-dgl/build.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (c) 2022, NVIDIA CORPORATION. - -# This assumes the script is executed from the root of the repo directory - -./build.sh cugraph-dgl diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml deleted file mode 100644 index ed449b5982a..00000000000 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. - -{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} -{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version = environ['CONDA_PY'] %} -{% set date_string = environ['RAPIDS_DATE_STRING'] %} - -package: - name: cugraph-dgl - version: {{ version }} - -source: - path: ../../.. - -build: - number: {{ GIT_DESCRIBE_NUMBER }} - build: - number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} - -requirements: - host: - - python - - rapids-build-backend>=0.3.1,<0.4.0.dev0 - - setuptools>=61.0.0 - run: - - cugraph ={{ version }} - - dgl >=2.4.0.th23.cu* - - numba >=0.57 - - numpy >=1.23,<3.0a0 - - pylibcugraphops ={{ minor_version }} - - tensordict >=0.1.2 - - python - - pytorch >=2.3 - - cupy >=12.0.0 - -tests: - imports: - - cugraph_dgl - -about: - home: https://rapids.ai/ - dev_url: https://github.com/rapidsai/cugraph - license: Apache-2.0 - license_file: ../../../LICENSE - summary: cuGraph library diff --git a/conda/recipes/cugraph-pyg/build.sh b/conda/recipes/cugraph-pyg/build.sh deleted file mode 100644 index ad2502985e5..00000000000 --- a/conda/recipes/cugraph-pyg/build.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2022, NVIDIA CORPORATION. - -# This assumes the script is executed from the root of the repo directory -./build.sh cugraph-pyg --allgpuarch diff --git a/conda/recipes/cugraph-pyg/conda_build_config.yaml b/conda/recipes/cugraph-pyg/conda_build_config.yaml deleted file mode 100644 index 47d98b4800b..00000000000 --- a/conda/recipes/cugraph-pyg/conda_build_config.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. - -c_compiler_version: - - 11 - -cxx_compiler_version: - - 11 - -cuda_compiler: - - nvcc - -cmake_version: - - ">=3.26.4,!=3.30.0" - -c_stdlib: - - sysroot - -c_stdlib_version: - - "2.17" diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml deleted file mode 100644 index b44614baa9f..00000000000 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. - -{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} -{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version = environ['CONDA_PY'] %} -{% set date_string = environ['RAPIDS_DATE_STRING'] %} - -package: - name: cugraph-pyg - version: {{ version }} - -source: - path: ../../.. - -build: - number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} - script_env: - - PARALLEL_LEVEL - -requirements: - build: - - {{ stdlib("c") }} - host: - - cython >=3.0.0 - - python - - rapids-build-backend>=0.3.1,<0.4.0.dev0 - - setuptools>=61.0.0 - run: - - rapids-dask-dependency ={{ minor_version }} - - numba >=0.57 - - numpy >=1.23,<3.0a0 - - python - - pytorch >=2.3 - - cupy >=12.0.0 - - cugraph ={{ version }} - - pylibcugraphops ={{ minor_version }} - - tensordict >=0.1.2 - - pytorch_geometric >=2.5,<2.6 - -tests: - imports: - - cugraph_pyg - -about: - home: https://rapids.ai/ - dev_url: https://github.com/rapidsai/cugraph - license: Apache-2.0 - license_file: ../../../LICENSE - summary: cuGraph-pyg library diff --git a/conda/recipes/cugraph-service/conda_build_config.yaml b/conda/recipes/cugraph-service/conda_build_config.yaml index 67ed3e26b0e..7e541432be8 100644 --- a/conda/recipes/cugraph-service/conda_build_config.yaml +++ b/conda/recipes/cugraph-service/conda_build_config.yaml @@ -1,2 +1,2 @@ ucx_py_version: - - "0.41.*" + - "0.42.*" diff --git a/conda/recipes/cugraph/conda_build_config.yaml b/conda/recipes/cugraph/conda_build_config.yaml index 10f2e15c550..6f1201c83a4 100644 --- a/conda/recipes/cugraph/conda_build_config.yaml +++ b/conda/recipes/cugraph/conda_build_config.yaml @@ -20,4 +20,4 @@ c_stdlib_version: - "2.17" ucx_py_version: - - "0.41.*" + - "0.42.*" diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 5a13ba30fd0..094b30c5fce 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -75,10 +75,10 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.8.5,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.6.2,<13.0a0 {% endif %} - cudf ={{ minor_version }} - cupy >=12.0.0 diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 8b1fadbba25..922d5e577b2 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -69,7 +69,6 @@ requirements: - cuda-version ={{ cuda_version }} - doxygen {{ doxygen_version }} - libcudf ={{ minor_version }} - - libcugraphops ={{ minor_version }} - libraft ={{ minor_version }} - libraft-headers ={{ minor_version }} - librmm ={{ minor_version }} @@ -114,7 +113,6 @@ outputs: - libcusolver - libcusparse {% endif %} - - libcugraphops ={{ minor_version }} - libraft ={{ minor_version }} - libraft-headers ={{ minor_version }} - librmm ={{ minor_version }} diff --git a/conda/recipes/pylibcugraph/conda_build_config.yaml b/conda/recipes/pylibcugraph/conda_build_config.yaml index 10f2e15c550..6f1201c83a4 100644 --- a/conda/recipes/pylibcugraph/conda_build_config.yaml +++ b/conda/recipes/pylibcugraph/conda_build_config.yaml @@ -20,4 +20,4 @@ c_stdlib_version: - "2.17" ucx_py_version: - - "0.41.*" + - "0.42.*" diff --git a/cpp/.clang-format b/cpp/.clang-format index 3095c6d514a..16abf7475fa 100644 --- a/cpp/.clang-format +++ b/cpp/.clang-format @@ -77,7 +77,7 @@ IncludeCategories: Priority: 1 - Regex: '^ - $ PRIVATE ${COMPILED_RAFT_LIB} cuco::cuco + rmm::rmm_logger_impl ) ################################################################################ @@ -538,6 +538,8 @@ add_library(cugraph_c src/c_api/weakly_connected_components.cpp src/c_api/strongly_connected_components.cpp src/c_api/allgather.cpp + src/c_api/decompress_to_edgelist.cpp + src/c_api/edgelist.cpp ) add_library(cugraph::cugraph_c ALIAS cugraph_c) diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh index 583b0a37214..628c3cc10cc 100644 --- a/cpp/include/cugraph/edge_partition_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_device_view.cuh @@ -204,6 +204,7 @@ class edge_partition_device_view_t view) : detail::edge_partition_device_view_base_t(view.offsets(), view.indices()), dcs_nzd_vertices_(detail::to_thrust_optional(view.dcs_nzd_vertices())), + dcs_nzd_range_bitmap_(detail::to_thrust_optional(view.dcs_nzd_range_bitmap())), major_hypersparse_first_(detail::to_thrust_optional(view.major_hypersparse_first())), major_range_first_(view.major_range_first()), major_range_last_(view.major_range_last()), @@ -218,6 +219,7 @@ class edge_partition_device_view_t()); } + template + __host__ void compute_number_of_edges_async(MajorIterator major_first, + MajorIterator major_last, + raft::device_span count /* size = 1 */, + rmm::cuda_stream_view stream) const + { + if (thrust::distance(major_first, major_last) == 0) { + RAFT_CUDA_TRY(cudaMemsetAsync(count.data(), 0, sizeof(size_t), stream)); + } + + rmm::device_uvector d_tmp_storage(0, stream); + size_t tmp_storage_bytes{0}; + + if (dcs_nzd_vertices_) { + auto local_degree_first = thrust::make_transform_iterator( + major_first, + detail::local_degree_op_t{ + this->offsets_, major_range_first_, *dcs_nzd_vertices_, *major_hypersparse_first_}); + cub::DeviceReduce::Sum(static_cast(nullptr), + tmp_storage_bytes, + local_degree_first, + count.data(), + thrust::distance(major_first, major_last), + stream); + d_tmp_storage.resize(tmp_storage_bytes, stream); + cub::DeviceReduce::Sum(d_tmp_storage.data(), + tmp_storage_bytes, + local_degree_first, + count.data(), + thrust::distance(major_first, major_last), + stream); + } else { + auto local_degree_first = thrust::make_transform_iterator( + major_first, + detail::local_degree_op_t{ + this->offsets_, major_range_first_, std::byte{0} /* dummy */, std::byte{0} /* dummy */}); + cub::DeviceReduce::Sum(static_cast(nullptr), + tmp_storage_bytes, + local_degree_first, + count.data(), + thrust::distance(major_first, major_last), + stream); + d_tmp_storage.resize(tmp_storage_bytes, stream); + cub::DeviceReduce::Sum(d_tmp_storage.data(), + tmp_storage_bytes, + local_degree_first, + count.data(), + thrust::distance(major_first, major_last), + stream); + } + } + __host__ rmm::device_uvector compute_local_degrees(rmm::cuda_stream_view stream) const { rmm::device_uvector local_degrees(this->major_range_size(), stream); if (dcs_nzd_vertices_) { assert(major_hypersparse_first_); - thrust::transform(rmm::exec_policy(stream), + thrust::transform(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(this->major_range_first()), thrust::make_counting_iterator(this->major_range_last()), local_degrees.begin(), @@ -266,7 +328,7 @@ class edge_partition_device_view_tmajor_range_first()), thrust::make_counting_iterator(this->major_range_last()), local_degrees.begin(), @@ -284,7 +346,7 @@ class edge_partition_device_view_t local_degrees(thrust::distance(major_first, major_last), stream); if (dcs_nzd_vertices_) { assert(major_hypersparse_first_); - thrust::transform(rmm::exec_policy(stream), + thrust::transform(rmm::exec_policy_nosync(stream), major_first, major_last, local_degrees.begin(), @@ -295,7 +357,7 @@ class edge_partition_device_view_tmajor_range_first()), thrust::make_counting_iterator(this->major_range_last()), local_degrees.begin(), @@ -368,7 +431,7 @@ class edge_partition_device_view_tmajor_range_first()), thrust::make_counting_iterator(this->major_range_last()), local_degrees.begin(), @@ -394,7 +457,7 @@ class edge_partition_device_view_t> for consistency (see + // dcs_nzd_range_bitmap()) __host__ __device__ thrust::optional dcs_nzd_vertices() const { return dcs_nzd_vertices_ ? thrust::optional{(*dcs_nzd_vertices_).data()} @@ -528,10 +593,20 @@ class edge_partition_device_view_t> dcs_nzd_range_bitmap() + const + { + return dcs_nzd_range_bitmap_ + ? thrust::make_optional>( + (*dcs_nzd_range_bitmap_).data(), (*dcs_nzd_range_bitmap_).size()) + : thrust::nullopt; + } + private: // should be trivially copyable to device thrust::optional> dcs_nzd_vertices_{thrust::nullopt}; + thrust::optional> dcs_nzd_range_bitmap_{thrust::nullopt}; thrust::optional major_hypersparse_first_{thrust::nullopt}; vertex_t major_range_first_{0}; @@ -558,6 +633,7 @@ class edge_partition_device_view_t()); } + template + __host__ void compute_number_of_edges_async(MajorIterator major_first, + MajorIterator major_last, + raft::device_span count /* size = 1 */, + rmm::cuda_stream_view stream) const + { + if (thrust::distance(major_first, major_last) == 0) { + RAFT_CUDA_TRY(cudaMemsetAsync(count.data(), 0, sizeof(size_t), stream)); + } + + rmm::device_uvector d_tmp_storage(0, stream); + size_t tmp_storage_bytes{0}; + + auto local_degree_first = thrust::make_transform_iterator( + major_first, + detail::local_degree_op_t{this->offsets_, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */, + std::byte{0} /* dummy */}); + cub::DeviceReduce::Sum(static_cast(nullptr), + tmp_storage_bytes, + local_degree_first, + count.data(), + thrust::distance(major_first, major_last), + stream); + d_tmp_storage.resize(tmp_storage_bytes, stream); + cub::DeviceReduce::Sum(d_tmp_storage.data(), + tmp_storage_bytes, + local_degree_first, + count.data(), + thrust::distance(major_first, major_last), + stream); + } + __host__ rmm::device_uvector compute_local_degrees(rmm::cuda_stream_view stream) const { rmm::device_uvector local_degrees(this->major_range_size(), stream); - thrust::transform(rmm::exec_policy(stream), + thrust::transform(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(this->major_range_first()), thrust::make_counting_iterator(this->major_range_last()), local_degrees.begin(), @@ -595,7 +709,7 @@ class edge_partition_device_view_t local_degrees(thrust::distance(major_first, major_last), stream); - thrust::transform(rmm::exec_policy(stream), + thrust::transform(rmm::exec_policy_nosync(stream), major_first, major_last, local_degrees.begin(), @@ -613,6 +727,7 @@ class edge_partition_device_view_t local_degrees(this->major_range_size(), stream); thrust::transform( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(this->major_range_first()), thrust::make_counting_iterator(this->major_range_last()), local_degrees.begin(), @@ -660,7 +775,7 @@ class edge_partition_device_view_t local_degrees(thrust::distance(major_first, major_last), stream); thrust::transform( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), major_first, major_last, local_degrees.begin(), diff --git a/cpp/include/cugraph/edge_partition_view.hpp b/cpp/include/cugraph/edge_partition_view.hpp index 42465273718..27c5705dfcc 100644 --- a/cpp/include/cugraph/edge_partition_view.hpp +++ b/cpp/include/cugraph/edge_partition_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,6 +56,7 @@ class edge_partition_view_t offsets, raft::device_span indices, std::optional> dcs_nzd_vertices, + std::optional> dcs_nzd_range_bitmap, std::optional major_hypersparse_first, vertex_t major_range_first, vertex_t major_range_last, @@ -64,6 +65,7 @@ class edge_partition_view_t(offsets, indices), dcs_nzd_vertices_(dcs_nzd_vertices), + dcs_nzd_range_bitmap_(dcs_nzd_range_bitmap), major_hypersparse_first_(major_hypersparse_first), major_range_first_(major_range_first), major_range_last_(major_range_last), @@ -78,6 +80,11 @@ class edge_partition_view_t> dcs_nzd_range_bitmap() const + { + return dcs_nzd_range_bitmap_; + } + std::optional major_hypersparse_first() const { return major_hypersparse_first_; } vertex_t major_range_first() const { return major_range_first_; } @@ -90,6 +97,7 @@ class edge_partition_view_t> dcs_nzd_vertices_{std::nullopt}; + std::optional> dcs_nzd_range_bitmap_{std::nullopt}; std::optional major_hypersparse_first_{std::nullopt}; vertex_t major_range_first_{0}; diff --git a/cpp/include/cugraph/graph.hpp b/cpp/include/cugraph/graph.hpp index 0607b39153d..290f4b3c4db 100644 --- a/cpp/include/cugraph/graph.hpp +++ b/cpp/include/cugraph/graph.hpp @@ -48,6 +48,7 @@ struct graph_meta_t> { partition_t partition{}; std::vector edge_partition_segment_offsets{}; + std::optional> edge_partition_hypersparse_degree_offsets{}; vertex_t num_local_unique_edge_srcs{}; vertex_t num_local_unique_edge_dsts{}; @@ -61,6 +62,7 @@ struct graph_meta_t> { // segment offsets based on vertex degree, relevant only if vertex IDs are renumbered std::optional> segment_offsets{std::nullopt}; + std::optional> hypersparse_degree_offsets{std::nullopt}; }; // graph_t is an owning graph class (note that graph_view_t is a non-owning graph class) @@ -101,6 +103,11 @@ class graph_t>>( (*edge_partition_dcs_nzd_vertices_).size()) : std::nullopt; + auto dcs_nzd_range_bitmaps = + edge_partition_dcs_nzd_range_bitmaps_ + ? std::make_optional>>( + (*edge_partition_dcs_nzd_range_bitmaps_).size()) + : std::nullopt; for (size_t i = 0; i < offsets.size(); ++i) { offsets[i] = raft::device_span(edge_partition_offsets_[i].data(), edge_partition_offsets_[i].size()); @@ -111,6 +118,11 @@ class graph_t((*edge_partition_dcs_nzd_vertices_)[i].data(), (*edge_partition_dcs_nzd_vertices_)[i].size()); } + if (dcs_nzd_range_bitmaps) { + (*dcs_nzd_range_bitmaps)[i] = + raft::device_span((*edge_partition_dcs_nzd_range_bitmaps_)[i].data(), + (*edge_partition_dcs_nzd_range_bitmaps_)[i].size()); + } } std::conditional_t{ this->number_of_vertices(), this->number_of_edges(), this->properties_, partition_, edge_partition_segment_offsets_, + edge_partition_hypersparse_degree_offsets_, local_sorted_unique_edge_srcs, local_sorted_unique_edge_src_chunk_start_offsets, local_sorted_unique_edge_src_chunk_size_, @@ -224,10 +238,13 @@ class graph_t>> edge_partition_dcs_nzd_vertices_{ std::nullopt}; + std::optional>> edge_partition_dcs_nzd_range_bitmaps_{ + std::nullopt}; partition_t partition_{}; // segment offsets within the vertex partition based on vertex degree std::vector edge_partition_segment_offsets_{}; + std::optional> edge_partition_hypersparse_degree_offsets_{}; // if valid, store row/column properties in key/value pairs (this saves memory if # unique edge // sources/destinations << V / major_comm_size|minor_comm_size). @@ -290,7 +307,11 @@ class graph_t(offsets_.data(), offsets_.size()), raft::device_span(indices_.data(), indices_.size()), graph_view_meta_t{ - this->number_of_vertices(), this->number_of_edges(), this->properties_, segment_offsets_}); + this->number_of_vertices(), + this->number_of_edges(), + this->properties_, + segment_offsets_, + hypersparse_degree_offsets_}); } private: @@ -299,6 +320,7 @@ class graph_t> segment_offsets_{}; + std::optional> hypersparse_degree_offsets_{}; }; template diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 866ab16ee97..6a03b9a6454 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -41,11 +41,13 @@ struct renumber_meta_t> edge_t number_of_edges{}; partition_t partition{}; std::vector edge_partition_segment_offsets{}; + std::optional> edge_partition_hypersparse_degree_offsets{}; }; template struct renumber_meta_t> { std::vector segment_offsets{}; + std::optional> hypersparse_degree_offsets{}; }; /** @@ -244,7 +246,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -284,7 +286,7 @@ std::enable_if_t unrenumber_local_int_edges( * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -346,7 +348,7 @@ void renumber_local_ext_vertices(raft::handle_t const& handle, * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam edge_type_t Type of edge types. Needs to be an integral type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -388,7 +390,7 @@ decompress_to_edgelist( * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -421,7 +423,7 @@ symmetrize_edgelist(raft::handle_t const& handle, * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -463,7 +465,7 @@ symmetrize_graph( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -505,7 +507,7 @@ transpose_graph( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -549,7 +551,7 @@ transpose_graph_storage( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -625,7 +627,7 @@ void relabel(raft::handle_t const& handle, * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -678,7 +680,7 @@ extract_induced_subgraphs( * @tparam edge_type_t Type of edge type. Needs to be an integral type, currently only int32_t is * supported * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -743,7 +745,7 @@ create_graph_from_edgelist(raft::handle_t const& handle, * @tparam edge_type_t Type of edge type. Needs to be an integral type, currently only int32_t is * supported * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -807,7 +809,7 @@ create_graph_from_edgelist( * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -829,7 +831,7 @@ std::tuple, rmm::device_uvector> get_two * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -856,7 +858,7 @@ rmm::device_uvector compute_in_weight_sums( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -883,7 +885,7 @@ rmm::device_uvector compute_out_weight_sums( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -910,7 +912,7 @@ weight_t compute_max_in_weight_sum( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -937,7 +939,7 @@ weight_t compute_max_out_weight_sum( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -963,7 +965,7 @@ weight_t compute_total_edge_weight( * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if - * true) as major indices in storing edges using a 2D sparse matrix. transposed. + * true) as major indices in storing edges using a 2D sparse matrix. * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -1114,7 +1116,8 @@ shuffle_external_vertex_value_pairs(raft::handle_t const& handle, * @param edge_ids Optional list of edge ids * @param edge_types Optional list of edge types * @return Tuple of vectors storing edge sources, destinations, optional weights, - * optional edge ids, optional edge types mapped to this GPU. + * optional edge ids, optional edge types mapped to this GPU and a vector storing the + * number of edges received from each GPU. */ template std::tuple, diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index a2ff3166fa4..d109fbdac95 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -165,7 +165,12 @@ class partition_t { return vertex_partition_range_last(partition_idx) - vertex_partition_range_first(partition_idx); } - size_t number_of_local_edge_partitions() const { return minor_comm_size_; } + size_t number_of_local_edge_partitions() const { return static_cast(minor_comm_size_); } + size_t coinciding_local_edge_partition_idx() const + { + return static_cast(minor_comm_rank_); + } // the major range of coinciding_local_edge_partition_idx()'th local edge partition coincides + // with the local vertex partition range // major: source of the edge partition (if not transposed) or destination of the edge partition // (if transposed). @@ -249,9 +254,13 @@ double constexpr edge_partition_src_dst_property_values_kv_pair_fill_ratio_thres // use the hypersparse format (currently, DCSR or DCSC) for the vertices with their degrees smaller // than minor_comm_size * hypersparse_threshold_ratio, should be less than 1.0 double constexpr hypersparse_threshold_ratio = 0.5; -size_t constexpr low_degree_threshold{raft::warp_size()}; -size_t constexpr mid_degree_threshold{1024}; -size_t constexpr num_sparse_segments_per_vertex_partition{3}; +size_t constexpr low_degree_threshold{ + raft::warp_size()}; // belongs to the low degree segment if the global degree is smaller than + // this value. +size_t constexpr mid_degree_threshold{ + 1024}; // belongs to the medium degree segment if the global degree is smaller than this value, + // otherwise, belongs to the high degree segment. +size_t constexpr num_sparse_segments_per_vertex_partition{3}; // high, mid, low // Common for both graph_view_t & graph_t and both single-GPU & multi-GPU versions template @@ -313,6 +322,7 @@ struct graph_view_meta_t edge_partition_segment_offsets{}; + std::optional> edge_partition_hypersparse_degree_offsets{}; std::conditional_t>, @@ -356,6 +366,7 @@ struct graph_view_meta_t> segment_offsets{std::nullopt}; + std::optional> hypersparse_degree_offsets{std::nullopt}; }; // graph_view_t is a non-owning graph class (note that graph_t is an owning graph class) @@ -380,6 +391,8 @@ class graph_view_t> const& edge_partition_indices, std::optional>> const& edge_partition_dcs_nzd_vertices, + std::optional>> const& + edge_partition_dcs_nzd_range_bitmaps, graph_view_meta_t meta); std::vector vertex_partition_range_offsets() const @@ -552,6 +565,12 @@ class graph_view_t> local_vertex_partition_segment_offsets() const + { + auto partition_idx = partition_.coinciding_local_edge_partition_idx(); + return local_edge_partition_segment_offsets(partition_idx); + } + std::optional> local_edge_partition_segment_offsets( size_t partition_idx) const { @@ -563,6 +582,28 @@ class graph_view_t> local_vertex_partition_hypersparse_degree_offsets() const + { + auto partition_idx = partition_.coinciding_local_edge_partition_idx(); + return local_edge_partition_hypersparse_degree_offsets(partition_idx); + } + + std::optional> local_edge_partition_hypersparse_degree_offsets( + size_t partition_idx) const + { + auto num_degrees_per_vertex_partition = + edge_partition_hypersparse_degree_offsets_ + ? ((*edge_partition_hypersparse_degree_offsets_).size() / edge_partition_offsets_.size()) + : size_t{0}; + return edge_partition_hypersparse_degree_offsets_ + ? std::make_optional>( + (*edge_partition_hypersparse_degree_offsets_).begin() + + partition_idx * num_degrees_per_vertex_partition, + (*edge_partition_hypersparse_degree_offsets_).begin() + + (partition_idx + 1) * num_degrees_per_vertex_partition) + : std::nullopt; + } + vertex_partition_view_t local_vertex_partition_view() const { return vertex_partition_view_t(this->number_of_vertices(), @@ -605,6 +646,9 @@ class graph_view_t>> edge_partition_dcs_nzd_vertices_{}; + std::optional>> + edge_partition_dcs_nzd_range_bitmaps_{}; partition_t partition_{}; // segment offsets based on vertex degree std::vector edge_partition_segment_offsets_{}; + std::optional> edge_partition_hypersparse_degree_offsets_{}; // if valid, store source/destination property values in key/value pairs (this saves memory if # // unique edge sources/destinations << V / major_comm_size|minor_comm_size). @@ -903,6 +950,11 @@ class graph_view_t> local_vertex_partition_segment_offsets() const + { + return local_edge_partition_segment_offsets(size_t{0}); + } + std::optional> local_edge_partition_segment_offsets( size_t partition_idx = 0) const { @@ -910,6 +962,18 @@ class graph_view_t> local_vertex_partition_hypersparse_degree_offsets() const + { + return local_edge_partition_hypersparse_degree_offsets(size_t{0}); + } + + std::optional> local_edge_partition_hypersparse_degree_offsets( + size_t partition_idx = 0) const + { + assert(partition_idx == 0); + return hypersparse_degree_offsets_; + } + vertex_partition_view_t local_vertex_partition_view() const { return vertex_partition_view_t(this->number_of_vertices()); @@ -1050,6 +1114,7 @@ class graph_view_t> segment_offsets_{std::nullopt}; + std::optional> hypersparse_degree_offsets_{std::nullopt}; std::optional> edge_mask_view_{std::nullopt}; }; diff --git a/cpp/include/cugraph/partition_manager.hpp b/cpp/include/cugraph/partition_manager.hpp index 309b169e646..2eb210fb7cd 100644 --- a/cpp/include/cugraph/partition_manager.hpp +++ b/cpp/include/cugraph/partition_manager.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,6 +71,30 @@ class partition_manager { : (major_comm_rank * minor_comm_size + minor_comm_rank); } +#ifdef __CUDACC__ + __host__ __device__ +#endif + static int + compute_major_comm_rank_from_global_comm_rank(int major_comm_size, + int minor_comm_size, + int comm_rank) + { + return map_major_comm_to_gpu_row_comm ? comm_rank % major_comm_size + : comm_rank / minor_comm_size; + } + +#ifdef __CUDACC__ + __host__ __device__ +#endif + static int + compute_minor_comm_rank_from_global_comm_rank(int major_comm_size, + int minor_comm_size, + int comm_rank) + { + return map_major_comm_to_gpu_row_comm ? comm_rank / major_comm_size + : comm_rank % minor_comm_size; + } + #ifdef __CUDACC__ __host__ __device__ #endif diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index 3d41e954416..981c42135f6 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -306,7 +306,7 @@ struct sampling_flags_t { * @param edge_type_view Optional view object holding edge types for @p graph_view. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -340,7 +340,7 @@ homogeneous_uniform_neighbor_sample( std::optional> edge_id_view, std::optional> edge_type_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, sampling_flags_t sampling_flags, @@ -385,7 +385,7 @@ homogeneous_uniform_neighbor_sample( * corresponding edge can never be selected. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -421,7 +421,7 @@ homogeneous_biased_neighbor_sample( std::optional> edge_type_view, edge_property_view_t edge_bias_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, sampling_flags_t sampling_flags, @@ -462,7 +462,7 @@ homogeneous_biased_neighbor_sample( * @param edge_type_view Optional view object holding edge types for @p graph_view. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -498,7 +498,7 @@ heterogeneous_uniform_neighbor_sample( std::optional> edge_id_view, std::optional> edge_type_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, edge_type_t num_edge_types, @@ -545,7 +545,7 @@ heterogeneous_uniform_neighbor_sample( * corresponding edge can never be selected. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -583,7 +583,7 @@ heterogeneous_biased_neighbor_sample( std::optional> edge_type_view, edge_property_view_t edge_bias_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, edge_type_t num_edge_types, diff --git a/cpp/include/cugraph/utilities/dataframe_buffer.hpp b/cpp/include/cugraph/utilities/dataframe_buffer.hpp index a20613c65ef..6d47ec540da 100644 --- a/cpp/include/cugraph/utilities/dataframe_buffer.hpp +++ b/cpp/include/cugraph/utilities/dataframe_buffer.hpp @@ -82,6 +82,53 @@ auto allocate_dataframe_buffer(size_t buffer_size, rmm::cuda_stream_view stream_ std::make_index_sequence(), buffer_size, stream_view); } +template +struct dataframe_buffer_type { + using type = decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); +}; + +template +using dataframe_buffer_type_t = typename dataframe_buffer_type::type; + +template +std::optional> try_allocate_dataframe_buffer( + size_t buffer_size, rmm::cuda_stream_view stream_view) +{ + try { + return allocate_dataframe_buffer(buffer_size, stream_view); + } catch (std::exception const& e) { + return std::nullopt; + } +} + +template +struct dataframe_buffer_iterator_type { + using type = typename rmm::device_uvector::iterator; +}; + +template +struct dataframe_buffer_iterator_type> { + using type = thrust::zip_iterator::iterator...>>; +}; + +template +using dataframe_buffer_iterator_type_t = typename dataframe_buffer_iterator_type::type; + +template +struct dataframe_buffer_const_iterator_type { + using type = typename rmm::device_uvector::const_iterator; +}; + +template +struct dataframe_buffer_const_iterator_type> { + using type = + thrust::zip_iterator::const_iterator...>>; +}; + +template +using dataframe_buffer_const_iterator_type_t = + typename dataframe_buffer_const_iterator_type::type; + template void reserve_dataframe_buffer(BufferType& buffer, size_t new_buffer_capacity, @@ -206,30 +253,4 @@ auto get_dataframe_buffer_cend(BufferType& buffer) std::make_index_sequence::value>(), buffer); } -template -struct dataframe_buffer_value_type { - using type = void; -}; - -template -struct dataframe_buffer_value_type> { - using type = T; -}; - -template -struct dataframe_buffer_value_type...>> { - using type = thrust::tuple; -}; - -template -using dataframe_buffer_value_type_t = typename dataframe_buffer_value_type::type; - -template -struct dataframe_buffer_type { - using type = decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); -}; - -template -using dataframe_buffer_type_t = typename dataframe_buffer_type::type; - } // namespace cugraph diff --git a/cpp/include/cugraph/utilities/device_comm.hpp b/cpp/include/cugraph/utilities/device_comm.hpp index ffb0f7d9e5b..07de2d06466 100644 --- a/cpp/include/cugraph/utilities/device_comm.hpp +++ b/cpp/include/cugraph/utilities/device_comm.hpp @@ -55,7 +55,7 @@ auto iter_to_raw_ptr(thrust::detail::normal_iterator> iter } template -std::enable_if_t::value, void> +std::enable_if_t, void> device_isend_impl(raft::comms::comms_t const& comm, InputIterator input_first, size_t count, @@ -76,7 +76,7 @@ std::enable_if_t::value, void> device_isend_ raft::comms::request_t* request) { static_assert( - std::is_same::value_type, OutputValueType>::value); + std::is_same_v::value_type, OutputValueType>); comm.isend(iter_to_raw_ptr(input_first), count, dst, tag, request); } @@ -136,7 +136,7 @@ device_irecv_impl(raft::comms::comms_t const& comm, { static_assert( - std::is_same::value_type>::value); + std::is_same_v::value_type>); comm.irecv(iter_to_raw_ptr(output_first), count, src, tag, request); } @@ -200,7 +200,7 @@ device_sendrecv_impl(raft::comms::comms_t const& comm, { using value_type = typename std::iterator_traits::value_type; static_assert( - std::is_same::value_type, value_type>::value); + std::is_same_v::value_type, value_type>); comm.device_sendrecv(iter_to_raw_ptr(input_first), tx_count, dst, @@ -286,7 +286,7 @@ device_multicast_sendrecv_impl(raft::comms::comms_t const& comm, { using value_type = typename std::iterator_traits::value_type; static_assert( - std::is_same::value_type, value_type>::value); + std::is_same_v::value_type, value_type>); comm.device_multicast_sendrecv(iter_to_raw_ptr(input_first), tx_counts, tx_offsets, @@ -379,8 +379,8 @@ device_bcast_impl(raft::comms::comms_t const& comm, int root, rmm::cuda_stream_view stream_view) { - static_assert(std::is_same::value_type, - typename std::iterator_traits::value_type>::value); + static_assert(std::is_same_v::value_type, + typename std::iterator_traits::value_type>); comm.bcast( iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, root, stream_view.value()); } @@ -440,8 +440,8 @@ device_allreduce_impl(raft::comms::comms_t const& comm, raft::comms::op_t op, rmm::cuda_stream_view stream_view) { - static_assert(std::is_same::value_type, - typename std::iterator_traits::value_type>::value); + static_assert(std::is_same_v::value_type, + typename std::iterator_traits::value_type>); comm.allreduce( iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, op, stream_view.value()); } @@ -503,8 +503,8 @@ device_reduce_impl(raft::comms::comms_t const& comm, int root, rmm::cuda_stream_view stream_view) { - static_assert(std::is_same::value_type, - typename std::iterator_traits::value_type>::value); + static_assert(std::is_same_v::value_type, + typename std::iterator_traits::value_type>); comm.reduce(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, @@ -548,6 +548,62 @@ struct device_reduce_tuple_iterator_element_impl +std::enable_if_t::value, void> +device_allgather_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + rmm::cuda_stream_view stream_view) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allgather_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + rmm::cuda_stream_view stream_view) +{ + static_assert(std::is_same_v::value_type, + typename std::iterator_traits::value_type>); + comm.allgather( + iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), sendcount, stream_view.value()); +} + +template +struct device_allgather_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + rmm::cuda_stream_view stream_view) const + { + device_allgather_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + sendcount, + stream_view); + device_allgather_tuple_iterator_element_impl().run( + comm, input_first, output_first, sendcount, stream_view); + } +}; + +template +struct device_allgather_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + rmm::cuda_stream_view stream_view) const + { + } +}; + template std::enable_if_t::value, void> device_allgatherv_impl(raft::comms::comms_t const& comm, @@ -571,8 +627,8 @@ device_allgatherv_impl(raft::comms::comms_t const& comm, std::vector const& displacements, rmm::cuda_stream_view stream_view) { - static_assert(std::is_same::value_type, - typename std::iterator_traits::value_type>::value); + static_assert(std::is_same_v::value_type, + typename std::iterator_traits::value_type>); comm.allgatherv(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), recvcounts.data(), @@ -639,8 +695,8 @@ device_gatherv_impl(raft::comms::comms_t const& comm, int root, rmm::cuda_stream_view stream_view) { - static_assert(std::is_same::value_type, - typename std::iterator_traits::value_type>::value); + static_assert(std::is_same_v::value_type, + typename std::iterator_traits::value_type>); comm.gatherv(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), sendcount, @@ -1000,6 +1056,44 @@ device_reduce(raft::comms::comms_t const& comm, .run(comm, input_first, output_first, count, op, root, stream_view); } +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allgather(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + rmm::cuda_stream_view stream_view) +{ + detail::device_allgather_impl(comm, input_first, output_first, sendcount, stream_view); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_allgather(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + rmm::cuda_stream_view stream_view) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail::device_allgather_tuple_iterator_element_impl() + .run(comm, input_first, output_first, sendcount, stream_view); +} + template std::enable_if_t< std::is_arithmetic::value_type>::value, diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 633dabe5b40..91a349007da 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -81,7 +81,7 @@ std::tuple, std::vector> compute_offset_aligned_ return std::make_tuple(h_chunk_offsets, h_element_offsets); } else { - return std::make_tuple(std::vector{{0, offsets.size() - 1}}, + return std::make_tuple(std::vector{{0, static_cast(offsets.size() - 1)}}, std::vector{{0, num_elements}}); } } diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index 3cbd35b4bc3..98fa2cb1706 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,8 @@ namespace cugraph { namespace detail { +constexpr size_t cache_line_size = 128; + template struct compute_group_id_count_pair_t { GroupIdIterator group_id_first{}; @@ -76,6 +79,7 @@ inline std::tuple, std::vector> compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const& comm, rmm::device_uvector const& d_tx_value_counts, + bool drop_empty_ranks, rmm::cuda_stream_view stream_view) { auto const comm_size = comm.get_size(); @@ -111,28 +115,30 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const& comm, std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); - int num_tx_dst_ranks{0}; - int num_rx_src_ranks{0}; - for (int i = 0; i < comm_size; ++i) { - if (tx_counts[i] != 0) { - tx_counts[num_tx_dst_ranks] = tx_counts[i]; - tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; - tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; - ++num_tx_dst_ranks; - } - if (rx_counts[i] != 0) { - rx_counts[num_rx_src_ranks] = rx_counts[i]; - rx_offsets[num_rx_src_ranks] = rx_offsets[i]; - rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; - ++num_rx_src_ranks; + if (drop_empty_ranks) { + int num_tx_dst_ranks{0}; + int num_rx_src_ranks{0}; + for (int i = 0; i < comm_size; ++i) { + if (tx_counts[i] != 0) { + tx_counts[num_tx_dst_ranks] = tx_counts[i]; + tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; + tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; + ++num_tx_dst_ranks; + } + if (rx_counts[i] != 0) { + rx_counts[num_rx_src_ranks] = rx_counts[i]; + rx_offsets[num_rx_src_ranks] = rx_offsets[i]; + rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; + ++num_rx_src_ranks; + } } + tx_counts.resize(num_tx_dst_ranks); + tx_offsets.resize(num_tx_dst_ranks); + tx_dst_ranks.resize(num_tx_dst_ranks); + rx_counts.resize(num_rx_src_ranks); + rx_offsets.resize(num_rx_src_ranks); + rx_src_ranks.resize(num_rx_src_ranks); } - tx_counts.resize(num_tx_dst_ranks); - tx_offsets.resize(num_tx_dst_ranks); - tx_dst_ranks.resize(num_tx_dst_ranks); - rx_counts.resize(num_rx_src_ranks); - rx_offsets.resize(num_rx_src_ranks); - rx_src_ranks.resize(num_rx_src_ranks); return std::make_tuple(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks); } @@ -823,6 +829,8 @@ auto shuffle_values(raft::comms::comms_t const& comm, std::vector const& tx_value_counts, rmm::cuda_stream_view stream_view) { + using value_t = typename thrust::iterator_traits::value_type; + auto const comm_size = comm.get_size(); rmm::device_uvector d_tx_value_counts(comm_size, stream_view); @@ -836,11 +844,10 @@ auto shuffle_values(raft::comms::comms_t const& comm, std::vector rx_offsets{}; std::vector rx_src_ranks{}; std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = - detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream_view); + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, true, stream_view); - auto rx_value_buffer = - allocate_dataframe_buffer::value_type>( - rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream_view); + auto rx_value_buffer = allocate_dataframe_buffer( + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream_view); // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). device_multicast_sendrecv(comm, @@ -866,6 +873,236 @@ auto shuffle_values(raft::comms::comms_t const& comm, return std::make_tuple(std::move(rx_value_buffer), rx_counts); } +// Add gaps in the receive buffer to enforce that the sent data offset and the received data offset +// have the same alignment for every rank. This is faster assuming that @p alignment ensures cache +// line alignment in both send & receive buffer (tested with NCCL 2.23.4) +template +auto shuffle_values( + raft::comms::comms_t const& comm, + TxValueIterator tx_value_first, + std::vector const& tx_value_counts, + size_t alignment, // # elements + std::optional::value_type> fill_value, + rmm::cuda_stream_view stream_view) +{ + using value_t = typename thrust::iterator_traits::value_type; + + auto const comm_size = comm.get_size(); + + std::vector tx_value_displacements(tx_value_counts.size()); + std::exclusive_scan( + tx_value_counts.begin(), tx_value_counts.end(), tx_value_displacements.begin(), size_t{0}); + + std::vector tx_unaligned_counts(comm_size); + std::vector tx_displacements(comm_size); + std::vector tx_aligned_counts(comm_size); + std::vector tx_aligned_displacements(comm_size); + std::vector rx_unaligned_counts(comm_size); + std::vector rx_displacements(comm_size); + std::vector rx_aligned_counts(comm_size); + std::vector rx_aligned_displacements(comm_size); + std::vector tx_ranks(comm_size); + std::iota(tx_ranks.begin(), tx_ranks.end(), int{0}); + auto rx_ranks = tx_ranks; + for (size_t i = 0; i < tx_value_counts.size(); ++i) { + tx_unaligned_counts[i] = 0; + if (tx_value_displacements[i] % alignment != 0) { + tx_unaligned_counts[i] = + std::min(alignment - (tx_value_displacements[i] % alignment), tx_value_counts[i]); + } + tx_displacements[i] = tx_value_displacements[i]; + tx_aligned_counts[i] = tx_value_counts[i] - tx_unaligned_counts[i]; + tx_aligned_displacements[i] = tx_value_displacements[i] + tx_unaligned_counts[i]; + } + + rmm::device_uvector d_tx_unaligned_counts(tx_unaligned_counts.size(), stream_view); + rmm::device_uvector d_tx_aligned_counts(tx_aligned_counts.size(), stream_view); + rmm::device_uvector d_rx_unaligned_counts(rx_unaligned_counts.size(), stream_view); + rmm::device_uvector d_rx_aligned_counts(rx_aligned_counts.size(), stream_view); + raft::update_device(d_tx_unaligned_counts.data(), + tx_unaligned_counts.data(), + tx_unaligned_counts.size(), + stream_view); + raft::update_device( + d_tx_aligned_counts.data(), tx_aligned_counts.data(), tx_aligned_counts.size(), stream_view); + std::vector tx_counts(comm_size, size_t{1}); + std::vector tx_offsets(comm_size); + std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); + auto rx_counts = tx_counts; + auto rx_offsets = tx_offsets; + cugraph::device_multicast_sendrecv(comm, + d_tx_unaligned_counts.data(), + tx_counts, + tx_offsets, + tx_ranks, + d_rx_unaligned_counts.data(), + rx_counts, + rx_offsets, + rx_ranks, + stream_view); + cugraph::device_multicast_sendrecv(comm, + d_tx_aligned_counts.data(), + tx_counts, + tx_offsets, + tx_ranks, + d_rx_aligned_counts.data(), + rx_counts, + rx_offsets, + rx_ranks, + stream_view); + raft::update_host(rx_unaligned_counts.data(), + d_rx_unaligned_counts.data(), + d_rx_unaligned_counts.size(), + stream_view); + raft::update_host( + rx_aligned_counts.data(), d_rx_aligned_counts.data(), d_rx_aligned_counts.size(), stream_view); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view)); + size_t offset{0}; + for (size_t i = 0; i < rx_counts.size(); ++i) { + auto target_alignment = (alignment - rx_unaligned_counts[i]) % alignment; + auto cur_alignment = offset % alignment; + if (target_alignment >= cur_alignment) { + offset += target_alignment - cur_alignment; + } else { + offset += (target_alignment + alignment) - cur_alignment; + } + rx_displacements[i] = offset; + rx_aligned_displacements[i] = rx_displacements[i] + rx_unaligned_counts[i]; + offset = rx_aligned_displacements[i] + rx_aligned_counts[i]; + } + + auto rx_values = allocate_dataframe_buffer( + rx_aligned_displacements.back() + rx_aligned_counts.back(), stream_view); + if (fill_value) { + thrust::fill(rmm::exec_policy_nosync(stream_view), + get_dataframe_buffer_begin(rx_values), + get_dataframe_buffer_end(rx_values), + *fill_value); + } + cugraph::device_multicast_sendrecv(comm, + tx_value_first, + tx_unaligned_counts, + tx_displacements, + tx_ranks, + get_dataframe_buffer_begin(rx_values), + rx_unaligned_counts, + rx_displacements, + rx_ranks, + stream_view); + cugraph::device_multicast_sendrecv(comm, + tx_value_first, + tx_aligned_counts, + tx_aligned_displacements, + tx_ranks, + get_dataframe_buffer_begin(rx_values), + rx_aligned_counts, + rx_aligned_displacements, + rx_ranks, + stream_view); + + return std::make_tuple(std::move(rx_values), + tx_unaligned_counts, + tx_aligned_counts, + tx_displacements, + rx_unaligned_counts, + rx_aligned_counts, + rx_displacements); +} + +// this uses less memory than calling shuffle_values then sort & unique but requires comm.get_size() +// - 1 communication steps +template +auto shuffle_and_unique_segment_sorted_values( + raft::comms::comms_t const& comm, + TxValueIterator + segment_sorted_tx_value_first, // sorted within each segment (segment sizes: + // tx_value_counts[i], where i = [0, comm_size); and bettter be + // unique to reduce communication volume + std::vector const& tx_value_counts, + rmm::cuda_stream_view stream_view) +{ + using value_t = typename thrust::iterator_traits::value_type; + + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + auto sorted_unique_values = allocate_dataframe_buffer(0, stream_view); + if (comm_size == 1) { + resize_dataframe_buffer(sorted_unique_values, tx_value_counts[comm_rank], stream_view); + thrust::copy(rmm::exec_policy_nosync(stream_view), + segment_sorted_tx_value_first, + segment_sorted_tx_value_first + tx_value_counts[comm_rank], + get_dataframe_buffer_begin(sorted_unique_values)); + resize_dataframe_buffer( + sorted_unique_values, + thrust::distance(get_dataframe_buffer_begin(sorted_unique_values), + thrust::unique(rmm::exec_policy_nosync(stream_view), + get_dataframe_buffer_begin(sorted_unique_values), + get_dataframe_buffer_end(sorted_unique_values))), + stream_view); + } else { + rmm::device_uvector d_tx_value_counts(comm_size, stream_view); + raft::update_device( + d_tx_value_counts.data(), tx_value_counts.data(), comm_size, stream_view.value()); + + std::vector tx_counts{}; + std::vector tx_offsets{}; + std::vector rx_counts{}; + std::vector rx_offsets{}; + std::tie(tx_counts, tx_offsets, std::ignore, rx_counts, rx_offsets, std::ignore) = + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, false, stream_view); + + d_tx_value_counts.resize(0, stream_view); + d_tx_value_counts.shrink_to_fit(stream_view); + + for (int i = 1; i < comm_size; ++i) { + auto dst = (comm_rank + i) % comm_size; + auto src = + static_cast((static_cast(comm_rank) + static_cast(comm_size - i)) % + static_cast(comm_size)); + auto rx_sorted_values = allocate_dataframe_buffer(rx_counts[src], stream_view); + device_sendrecv(comm, + segment_sorted_tx_value_first + tx_offsets[dst], + tx_counts[dst], + dst, + get_dataframe_buffer_begin(rx_sorted_values), + rx_counts[src], + src, + stream_view); + auto merged_sorted_values = allocate_dataframe_buffer( + (i == 1 ? tx_counts[comm_rank] : size_dataframe_buffer(sorted_unique_values)) + + rx_counts[src], + stream_view); + if (i == 1) { + thrust::merge( + rmm::exec_policy_nosync(stream_view), + segment_sorted_tx_value_first + tx_offsets[comm_rank], + segment_sorted_tx_value_first + (tx_offsets[comm_rank] + tx_counts[comm_rank]), + get_dataframe_buffer_begin(rx_sorted_values), + get_dataframe_buffer_end(rx_sorted_values), + get_dataframe_buffer_begin(merged_sorted_values)); + } else { + thrust::merge(rmm::exec_policy_nosync(stream_view), + get_dataframe_buffer_begin(sorted_unique_values), + get_dataframe_buffer_end(sorted_unique_values), + get_dataframe_buffer_begin(rx_sorted_values), + get_dataframe_buffer_end(rx_sorted_values), + get_dataframe_buffer_begin(merged_sorted_values)); + } + resize_dataframe_buffer( + merged_sorted_values, + thrust::distance(get_dataframe_buffer_begin(merged_sorted_values), + thrust::unique(rmm::exec_policy_nosync(stream_view), + get_dataframe_buffer_begin(merged_sorted_values), + get_dataframe_buffer_end(merged_sorted_values))), + stream_view); + sorted_unique_values = std::move(merged_sorted_values); + } + } + shrink_to_fit_dataframe_buffer(sorted_unique_values, stream_view); + return sorted_unique_values; +} + template auto groupby_gpu_id_and_shuffle_values(raft::comms::comms_t const& comm, ValueIterator tx_value_first /* [INOUT */, @@ -889,7 +1126,7 @@ auto groupby_gpu_id_and_shuffle_values(raft::comms::comms_t const& comm, std::vector rx_offsets{}; std::vector rx_src_ranks{}; std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = - detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream_view); + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, true, stream_view); auto rx_value_buffer = allocate_dataframe_buffer::value_type>( @@ -943,7 +1180,7 @@ auto groupby_gpu_id_and_shuffle_kv_pairs(raft::comms::comms_t const& comm, std::vector rx_offsets{}; std::vector rx_src_ranks{}; std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = - detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream_view); + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, true, stream_view); rmm::device_uvector::value_type> rx_keys( rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream_view); diff --git a/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp b/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp index 2c36ed33359..29b9d132ef8 100644 --- a/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp +++ b/cpp/include/cugraph/utilities/thrust_tuple_utils.hpp @@ -64,6 +64,18 @@ size_t sum_thrust_tuple_element_sizes(std::index_sequence) return (... + sizeof(typename thrust::tuple_element::type)); } +template +size_t min_thrust_tuple_element_sizes(std::index_sequence) +{ + return std::min(sizeof(typename thrust::tuple_element::type)...); +} + +template +size_t max_thrust_tuple_element_sizes(std::index_sequence) +{ + return std::max(sizeof(typename thrust::tuple_element::type)...); +} + template auto thrust_tuple_to_std_tuple(TupleType tup, std::index_sequence) { @@ -181,6 +193,20 @@ constexpr size_t sum_thrust_tuple_element_sizes() std::make_index_sequence::value>()); } +template +constexpr size_t min_thrust_tuple_element_sizes() +{ + return detail::min_thrust_tuple_element_sizes( + std::make_index_sequence::value>()); +} + +template +constexpr size_t max_thrust_tuple_element_sizes() +{ + return detail::max_thrust_tuple_element_sizes( + std::make_index_sequence::value>()); +} + template auto thrust_tuple_to_std_tuple(TupleType tup) { diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index ff7e439232a..964b2f2c8d6 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -104,6 +104,8 @@ cugraph_error_code_t cugraph_two_hop_neighbors( /** * @brief Opaque induced subgraph type + * + * @deprecated This API will be deleted, use cugraph_edgelist_t */ typedef struct { int32_t align_; @@ -112,6 +114,8 @@ typedef struct { /** * @brief Get the source vertex ids * + * @deprecated This API will be deleted, use cugraph_edgelist_get_sources + * * @param [in] induced_subgraph Opaque pointer to induced subgraph * @return type erased array view of source vertex ids */ @@ -121,6 +125,8 @@ cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_sources( /** * @brief Get the destination vertex ids * + * @deprecated This API will be deleted, use cugraph_edgelist_get_destinations + * * @param [in] induced_subgraph Opaque pointer to induced subgraph * @return type erased array view of destination vertex ids */ @@ -130,6 +136,8 @@ cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_destinatio /** * @brief Get the edge weights * + * @deprecated This API will be deleted, use cugraph_edgelist_get_edge_weights + * * @param [in] induced_subgraph Opaque pointer to induced subgraph * @return type erased array view of edge weights */ @@ -139,6 +147,8 @@ cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_weigh /** * @brief Get the edge ids * + * @deprecated This API will be deleted, use cugraph_edgelist_get_edge_ids + * * @param [in] induced_subgraph Opaque pointer to induced subgraph * @return type erased array view of edge ids */ @@ -148,6 +158,8 @@ cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_ids( /** * @brief Get the edge types * + * @deprecated This API will be deleted, use cugraph_edgelist_get_edge_type_ids + * * @param [in] induced_subgraph Opaque pointer to induced subgraph * @return type erased array view of edge types */ @@ -157,6 +169,8 @@ cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_type_ /** * @brief Get the subgraph offsets * + * @deprecated This API will be deleted, use cugraph_edgelist_get_edge_offsets + * * @param [in] induced_subgraph Opaque pointer to induced subgraph * @return type erased array view of subgraph identifiers */ @@ -166,6 +180,8 @@ cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_subgraph_o /** * @brief Free induced subgraph * + * @deprecated This API will be deleted, use cugraph_edgelist_free + * * @param [in] induced subgraph Opaque pointer to induced subgraph */ void cugraph_induced_subgraph_result_free(cugraph_induced_subgraph_result_t* induced_subgraph); @@ -361,6 +377,92 @@ cugraph_type_erased_device_array_view_t* cugraph_degrees_result_get_out_degrees( */ void cugraph_degrees_result_free(cugraph_degrees_result_t* degrees_result); +/** + * @brief Opaque edgelist type + * + */ +typedef struct { + int32_t align_; +} cugraph_edgelist_t; + +/** + * @brief Get the source vertex ids + * + * @param [in] edgelist Opaque pointer to edgelist + * @return type erased array view of source vertex ids + */ +cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_sources(cugraph_edgelist_t* edgelist); + +/** + * @brief Get the destination vertex ids + * + * @param [in] edgelist Opaque pointer to edgelist + * @return type erased array view of destination vertex ids + */ +cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_destinations( + cugraph_edgelist_t* edgelist); + +/** + * @brief Get the edge weights + * + * @param [in] edgelist Opaque pointer to edgelist + * @return type erased array view of edge weights + */ +cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_weights( + cugraph_edgelist_t* edgelist); + +/** + * @brief Get the edge ids + * + * @param [in] edgelist Opaque pointer to edgelist + * @return type erased array view of edge ids + */ +cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_ids( + cugraph_edgelist_t* edgelist); + +/** + * @brief Get the edge types + * + * @param [in] edgelist Opaque pointer to edgelist + * @return type erased array view of edge types + */ +cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_type_ids( + cugraph_edgelist_t* edgelist); + +/** + * @brief Get the edge offsets + * + * @param [in] edgelist Opaque pointer to edgelist + * @return type erased array view of subgraph identifiers + */ +cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_offsets( + cugraph_edgelist_t* edgelist); + +/** + * @brief Free edgelist + * + * @param [in] edgelist Opaque pointer to edgelist + */ +void cugraph_edgelist_free(cugraph_edgelist_t* edgelist); + +/** + * @brief Construct the edge list from the graph view object. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Graph to operate on + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * true) + * @param [out] result Opaque pointer to edgelist + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_decompress_to_edgelist(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + bool_t do_expensive_check, + cugraph_edgelist_t** result, + cugraph_error_t** error); + #ifdef __cplusplus } #endif diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index ef75e726d80..f048d338b97 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -551,6 +551,8 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( * @param [in] start_vertices Device array of start vertices for the sampling * @param [in] starting_vertex_label_offsets Device array of the offsets for each label in * the seed list. This parameter is only used with the retain_seeds option. + * @param [in] vertex_type_offsets Device array of the offsets for each vertex type in the + * graph. * @param [in] fan_out Host array defining the fan out at each step in the sampling * algorithm. We only support fan_out values of type INT32 * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous @@ -570,6 +572,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -598,6 +601,8 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( * @param [in] start_vertices Device array of start vertices for the sampling * @param [in] starting_vertex_label_offsets Device array of the offsets for each label in * the seed list. This parameter is only used with the retain_seeds option. + * @param [in] vertex_type_offsets Device array of the offsets for each vertex type in the + * graph. * @param [in] fan_out Host array defining the fan out at each step in the sampling * algorithm. We only support fan_out values of type INT32 * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous @@ -618,6 +623,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -735,6 +741,16 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_hop( cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_hop_offsets( const cugraph_sample_result_t* result); +/** + * @ingroup samplingC + * @brief Get the label-type-hop offsets from the sampling algorithm result + * + * @param [in] result The result from a sampling algorithm + * @return type erased array pointing to the label-type-hop offsets + */ +cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_type_hop_offsets( + const cugraph_sample_result_t* result); + /** * @ingroup samplingC * @brief Get the index from the sampling algorithm result diff --git a/cpp/libcugraph_etl/src/renumbering.cu b/cpp/libcugraph_etl/src/renumbering.cu index a2a20d5b2dc..0d47fad8000 100644 --- a/cpp/libcugraph_etl/src/renumbering.cu +++ b/cpp/libcugraph_etl/src/renumbering.cu @@ -730,7 +730,7 @@ __global__ static void select_unrenumber_string(str_hash_value* idx_to_col_row, } struct struct_sort_descending { - __host__ __device__ bool operator()(str_hash_value& a, str_hash_value& b) + __host__ __device__ bool operator()(str_hash_value a, str_hash_value b) { return (a.count_ > b.count_); } diff --git a/cpp/src/c_api/decompress_to_edgelist.cpp b/cpp/src/c_api/decompress_to_edgelist.cpp new file mode 100644 index 00000000000..75bf0c0fd60 --- /dev/null +++ b/cpp/src/c_api/decompress_to_edgelist.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_api/abstract_functor.hpp" +#include "c_api/core_result.hpp" +#include "c_api/edgelist.hpp" +#include "c_api/graph.hpp" +#include "c_api/resource_handle.hpp" +#include "c_api/utils.hpp" + +#include + +#include +#include +#include +#include + +#include + +namespace { + +struct decompress_to_edgelist_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_graph_t* graph_{}; + + cugraph::c_api::cugraph_core_result_t const* core_result_{}; + bool do_expensive_check_{}; + cugraph::c_api::cugraph_edgelist_t* result_{}; + + decompress_to_edgelist_functor(cugraph_resource_handle_t const* handle, + cugraph_graph_t* graph, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>( + graph_->graph_); + + auto graph_view = graph->view(); + + auto edge_weights = reinterpret_cast, + weight_t>*>(graph_->edge_weights_); + + auto edge_ids = reinterpret_cast, + edge_t>*>(graph_->edge_ids_); + + auto edge_types = reinterpret_cast, + edge_type_type_t>*>(graph_->edge_types_); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + auto [result_src, result_dst, result_wgt, result_edge_id, result_edge_type] = + cugraph::decompress_to_edgelist( + handle_, + graph_view, + (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, + (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, + (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, + (number_map != nullptr) ? std::make_optional>( + number_map->data(), number_map->size()) + : std::nullopt, + do_expensive_check_); + + result_ = new cugraph::c_api::cugraph_edgelist_t{ + new cugraph::c_api::cugraph_type_erased_device_array_t(result_src, graph_->vertex_type_), + new cugraph::c_api::cugraph_type_erased_device_array_t(result_dst, graph_->vertex_type_), + result_wgt ? new cugraph::c_api::cugraph_type_erased_device_array_t(*result_wgt, + graph_->weight_type_) + : NULL, + result_edge_id ? new cugraph::c_api::cugraph_type_erased_device_array_t(*result_edge_id, + graph_->edge_type_) + : NULL, + result_edge_type ? new cugraph::c_api::cugraph_type_erased_device_array_t( + *result_edge_type, graph_->edge_type_id_type_) + : NULL, + NULL}; + } + } +}; + +} // namespace + +extern "C" cugraph_error_code_t cugraph_decompress_to_edgelist( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + bool_t do_expensive_check, + cugraph_edgelist_t** result, + cugraph_error_t** error) +{ + decompress_to_edgelist_functor functor(handle, graph, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/c_api/edgelist.cpp b/cpp/src/c_api/edgelist.cpp new file mode 100644 index 00000000000..640b2bf2853 --- /dev/null +++ b/cpp/src/c_api/edgelist.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_api/edgelist.hpp" + +#include + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_sources( + cugraph_edgelist_t* edgelist) +{ + auto internal_pointer = reinterpret_cast(edgelist); + return reinterpret_cast(internal_pointer->src_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_destinations( + cugraph_edgelist_t* edgelist) +{ + auto internal_pointer = reinterpret_cast(edgelist); + return reinterpret_cast(internal_pointer->dst_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_weights( + cugraph_edgelist_t* edgelist) +{ + auto internal_pointer = reinterpret_cast(edgelist); + return (internal_pointer->wgt_ == nullptr) + ? NULL + : reinterpret_cast( + internal_pointer->wgt_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_ids( + cugraph_edgelist_t* edgelist) +{ + auto internal_pointer = reinterpret_cast(edgelist); + return (internal_pointer->edge_ids_ == nullptr) + ? NULL + : reinterpret_cast( + internal_pointer->edge_ids_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_type_ids( + cugraph_edgelist_t* edgelist) +{ + auto internal_pointer = reinterpret_cast(edgelist); + return (internal_pointer->edge_type_ids_ == nullptr) + ? NULL + : reinterpret_cast( + internal_pointer->edge_type_ids_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_edgelist_get_edge_offsets( + cugraph_edgelist_t* edgelist) +{ + auto internal_pointer = reinterpret_cast(edgelist); + return reinterpret_cast( + internal_pointer->subgraph_offsets_->view()); +} + +extern "C" void cugraph_edgelist_free(cugraph_edgelist_t* edgelist) +{ + auto internal_pointer = reinterpret_cast(edgelist); + delete internal_pointer->src_; + delete internal_pointer->dst_; + delete internal_pointer->wgt_; + delete internal_pointer->edge_ids_; + delete internal_pointer->edge_type_ids_; + delete internal_pointer->subgraph_offsets_; + delete internal_pointer; +} diff --git a/cpp/src/c_api/edgelist.hpp b/cpp/src/c_api/edgelist.hpp new file mode 100644 index 00000000000..bc0f2d337f1 --- /dev/null +++ b/cpp/src/c_api/edgelist.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "c_api/array.hpp" + +namespace cugraph { +namespace c_api { + +struct cugraph_edgelist_t { + cugraph_type_erased_device_array_t* src_{}; + cugraph_type_erased_device_array_t* dst_{}; + cugraph_type_erased_device_array_t* wgt_{}; + cugraph_type_erased_device_array_t* edge_ids_{}; + cugraph_type_erased_device_array_t* edge_type_ids_{}; + cugraph_type_erased_device_array_t* subgraph_offsets_{}; +}; + +} // namespace c_api +} // namespace cugraph diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index be3a44d813a..37982eab82e 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -63,6 +63,7 @@ struct cugraph_sample_result_t { cugraph_type_erased_device_array_t* wgt_{nullptr}; cugraph_type_erased_device_array_t* hop_{nullptr}; cugraph_type_erased_device_array_t* label_hop_offsets_{nullptr}; + cugraph_type_erased_device_array_t* label_type_hop_offsets_{nullptr}; cugraph_type_erased_device_array_t* label_{nullptr}; cugraph_type_erased_device_array_t* renumber_map_{nullptr}; cugraph_type_erased_device_array_t* renumber_map_offsets_{nullptr}; @@ -403,6 +404,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -756,6 +758,7 @@ struct biased_neighbor_sampling_functor : public cugraph::c_api::abstract_functo (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -777,7 +780,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertex_offsets_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{ + nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertex_type_offsets_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; int num_edge_types_{}; cugraph::c_api::cugraph_sampling_options_t options_{}; @@ -785,17 +790,19 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { bool do_expensive_check_{false}; cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; - neighbor_sampling_functor(cugraph_resource_handle_t const* handle, - cugraph_rng_state_t* rng_state, - cugraph_graph_t* graph, - cugraph_edge_property_view_t const* edge_biases, - cugraph_type_erased_device_array_view_t const* start_vertices, - cugraph_type_erased_device_array_view_t const* start_vertex_offsets, - cugraph_type_erased_host_array_view_t const* fan_out, - int num_edge_types, - cugraph::c_api::cugraph_sampling_options_t options, - bool is_biased, - bool do_expensive_check) + neighbor_sampling_functor( + cugraph_resource_handle_t const* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + cugraph_edge_property_view_t const* edge_biases, + cugraph_type_erased_device_array_view_t const* start_vertices, + cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, + cugraph_type_erased_device_array_view_t const* vertex_type_offsets, + cugraph_type_erased_host_array_view_t const* fan_out, + int num_edge_types, + cugraph::c_api::cugraph_sampling_options_t options, + bool is_biased, + bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), rng_state_(reinterpret_cast(rng_state)), @@ -805,9 +812,12 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { start_vertices_( reinterpret_cast( start_vertices)), - start_vertex_offsets_( + starting_vertex_label_offsets_( + reinterpret_cast( + starting_vertex_label_offsets)), + vertex_type_offsets_( reinterpret_cast( - start_vertex_offsets)), + vertex_type_offsets)), fan_out_( reinterpret_cast(fan_out)), num_edge_types_(num_edge_types), @@ -879,17 +889,17 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> renumbered_and_sorted_edge_id_renumber_map_label_type_offsets(std::nullopt); - if (start_vertex_offsets_ != nullptr) { + if (starting_vertex_label_offsets_ != nullptr) { // Retrieve the start_vertex_labels start_vertex_labels = cugraph::detail::convert_starting_vertex_label_offsets_to_labels( handle_, - raft::device_span{start_vertex_offsets_->as_type(), - start_vertex_offsets_->size_}); + raft::device_span{starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}); // Get the number of labels on each GPU if constexpr (multi_gpu) { - auto num_local_labels = start_vertex_offsets_->size_ - 1; + auto num_local_labels = starting_vertex_label_offsets_->size_ - 1; auto global_labels = cugraph::host_scalar_allgather( handle_.get_comms(), num_local_labels, handle_.get_stream()); @@ -897,7 +907,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::exclusive_scan( global_labels.begin(), global_labels.end(), global_labels.begin(), label_t{0}); - // Compute the global start_vertex_label_offsets + // Compute the global starting_vertex_label_offsets cugraph::detail::transform_increment_ints( raft::device_span{(*start_vertex_labels).data(), @@ -996,7 +1006,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, (edge_biases != nullptr) ? *edge_biases : edge_weights->view(), raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1020,7 +1030,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1048,7 +1058,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, (edge_biases != nullptr) ? *edge_biases : edge_weights->view(), raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1071,7 +1081,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1108,6 +1118,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> major_offsets{std::nullopt}; std::optional> label_hop_offsets{std::nullopt}; + std::optional> label_type_hop_offsets{std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional> renumber_map_offsets{std::nullopt}; @@ -1125,21 +1136,129 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } if (options_.renumber_results_) { - if (num_edge_types_ == 1) { // homogeneous renumbering - if (options_.compression_type_ == cugraph_compression_type_t::COO) { - // COO + if (src.size() > 0) { // Only renumber if there are edgelist to renumber + if (num_edge_types_ == 1) { // homogeneous renumbering + if (options_.compression_type_ == cugraph_compression_type_t::COO) { + // COO + + rmm::device_uvector output_majors(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(output_majors, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_sort_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + offsets ? (*offsets).size() - 1 : size_t{1}, + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + do_expensive_check_); + + majors.emplace(std::move(output_majors)); + renumber_map.emplace(std::move(output_renumber_map)); + } else { + // (D)CSC, (D)CSR + + bool doubly_compress = + (options_.compression_type_ == cugraph_compression_type_t::DCSR) || + (options_.compression_type_ == cugraph_compression_type_t::DCSC); + + rmm::device_uvector output_major_offsets(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + + std::tie(majors, + output_major_offsets, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_compress_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + edge_label ? (*offsets).size() - 1 : size_t{1}, // FIXME: update edge_label + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + options_.compress_per_hop_, + doubly_compress, + do_expensive_check_); + + renumber_map.emplace(std::move(output_renumber_map)); + major_offsets.emplace(std::move(output_major_offsets)); + } + + // These are now represented by label_hop_offsets + hop.reset(); + offsets.reset(); + + } else { // heterogeneous renumbering + + rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); + + if (vertex_type_offsets_ == nullptr) { + // If no 'vertex_type_offsets' is provided, all vertices are assumed to have + // a vertex type of value 1. + cugraph::detail::stride_fill(handle_.get_stream(), + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} + + ); + } rmm::device_uvector output_majors(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(output_majors, minors, wgt, edge_id, - edge_type, - label_hop_offsets, + label_type_hop_offsets, // Contains information about the type and hop offsets output_renumber_map, - renumber_map_offsets) = - cugraph::renumber_and_sort_sampled_edgelist( + renumber_map_offsets, + renumbered_and_sorted_edge_id_renumber_map, + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = + cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( handle_, std::move(src), std::move(dst), @@ -1151,140 +1270,47 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, - offsets ? (*offsets).size() - 1 : size_t{1}, - hop ? fan_out_->size_ : size_t{1}, - src_is_major, - do_expensive_check_); - majors.emplace(std::move(output_majors)); - renumber_map.emplace(std::move(output_renumber_map)); - } else { - // (D)CSC, (D)CSR - - bool doubly_compress = - (options_.compression_type_ == cugraph_compression_type_t::DCSR) || - (options_.compression_type_ == cugraph_compression_type_t::DCSC); - - rmm::device_uvector output_major_offsets(0, handle_.get_stream()); - rmm::device_uvector output_renumber_map(0, handle_.get_stream()); - - std::tie(majors, - output_major_offsets, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = - cugraph::renumber_and_compress_sampled_edgelist( - handle_, - std::move(src), - std::move(dst), - std::move(wgt), - std::move(edge_id), - std::move(edge_type), - std::move(hop), - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertices_->as_type(), start_vertices_->size_}) - : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, - offsets ? std::make_optional( - raft::device_span{offsets->data(), offsets->size()}) - : std::nullopt, - edge_label ? (*offsets).size() - 1 : size_t{1}, // FIXME: update edge_label - hop ? fan_out_->size_ : size_t{1}, + (vertex_type_offsets_ != nullptr) + ? raft::device_span{vertex_type_offsets_->as_type(), + vertex_type_offsets_->size_} + : raft::device_span{vertex_type_offsets.data(), + vertex_type_offsets.size()}, + + edge_label ? (*offsets).size() - 1 : size_t{1}, + hop ? (((fan_out_->size_ % num_edge_types_) == 0) + ? (fan_out_->size_ / num_edge_types_) + : ((fan_out_->size_ / num_edge_types_) + 1)) + : size_t{1}, + (vertex_type_offsets_ != nullptr) ? vertex_type_offsets_->size_ - 1 + : vertex_type_offsets.size() - 1, + + // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided. + num_edge_types_, src_is_major, - options_.compress_per_hop_, - doubly_compress, do_expensive_check_); + if (edge_type) { + (*edge_type) + .resize(raft::device_span{(*label_type_hop_offsets).data(), + (*label_type_hop_offsets).size()} + .back() - + 1, + handle_.get_stream()); + cugraph::detail::sequence_fill( + handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0}); + } + majors.emplace(std::move(output_majors)); + // FIXME: Need to update renumber_map because default values are being passed renumber_map.emplace(std::move(output_renumber_map)); - major_offsets.emplace(std::move(output_major_offsets)); } - - // These are now represented by label_hop_offsets - hop.reset(); - offsets.reset(); - - } else { // heterogeneous renumbering - - rmm::device_uvector vertex_type_offsets( - graph_view.local_vertex_partition_range_size(), handle_.get_stream()); - - cugraph::detail::sequence_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0} // FIXME: Update array - ); - - rmm::device_uvector output_majors(0, handle_.get_stream()); - rmm::device_uvector output_renumber_map(0, handle_.get_stream()); - - // extract the edge_type from label_type_hop_offsets - std::optional> label_type_hop_offsets{std::nullopt}; - std::tie(output_majors, - minors, - wgt, - edge_id, - label_type_hop_offsets, // Contains information about the type and hop offsets - output_renumber_map, - (*renumber_map_offsets), - renumbered_and_sorted_edge_id_renumber_map, - renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = - cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( - handle_, - std::move(src), - std::move(dst), - std::move(wgt), - std::move(edge_id), - std::move(edge_type), - std::move(hop), - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertices_->as_type(), start_vertices_->size_}) - : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, - offsets ? std::make_optional( - raft::device_span{offsets->data(), offsets->size()}) - : std::nullopt, - raft::device_span{vertex_type_offsets.data(), - vertex_type_offsets.size()}, - - edge_label ? (*offsets).size() - 1 : size_t{1}, - hop ? fan_out_->size_ : size_t{1}, - size_t{1}, - num_edge_types_, - src_is_major, - do_expensive_check_); - if (edge_type) { - (*edge_type) - .resize(raft::device_span{(*label_type_hop_offsets).data(), - (*label_type_hop_offsets).size()} - .back() - - 1, - handle_.get_stream()); - cugraph::detail::sequence_fill( - handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0}); - } - - majors.emplace(std::move(output_majors)); - // FIXME: Need to update renumber_map because default values are being passed - renumber_map.emplace(std::move(output_renumber_map)); } } else { @@ -1339,6 +1365,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + (label_type_hop_offsets) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_type_hop_offsets, SIZE_T) + : nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -1557,6 +1586,16 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_la : NULL; } +extern "C" cugraph_type_erased_device_array_view_t* +cugraph_sample_result_get_label_type_hop_offsets(const cugraph_sample_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->label_type_hop_offsets_ != nullptr + ? reinterpret_cast( + internal_pointer->label_type_hop_offsets_->view()) + : NULL; +} + extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_index( const cugraph_sample_result_t* result) { @@ -2018,7 +2057,8 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2029,17 +2069,17 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( auto options_cpp = *reinterpret_cast(options); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2062,7 +2102,8 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( graph, nullptr, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + vertex_type_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2077,7 +2118,8 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2095,17 +2137,17 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( *error); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2128,7 +2170,8 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( graph, edge_biases, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + vertex_type_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2142,7 +2185,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, // RENAME? + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2152,17 +2195,17 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( auto options_cpp = *reinterpret_cast(options); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2185,7 +2228,8 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( graph, nullptr, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + nullptr, fan_out, 1, // num_edge_types std::move(options_cpp), @@ -2200,7 +2244,7 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2217,17 +2261,17 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( *error); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2250,7 +2294,8 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( graph, edge_biases, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + nullptr, fan_out, 1, // num_edge_types std::move(options_cpp), diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh index 8ae49ed207c..88ef3987a03 100644 --- a/cpp/src/centrality/betweenness_centrality_impl.cuh +++ b/cpp/src/centrality/betweenness_centrality_impl.cuh @@ -23,7 +23,7 @@ #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_v.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -133,15 +133,15 @@ std::tuple, rmm::device_uvector> brandes_b update_edge_src_property(handle, graph_view, sigmas.begin(), src_sigmas.mutable_view()); update_edge_dst_property(handle, graph_view, distances.begin(), dst_distances.mutable_view()); - auto [new_frontier, new_sigma] = - transform_reduce_v_frontier_outgoing_e_by_dst(handle, - graph_view, - vertex_frontier.bucket(bucket_idx_cur), - src_sigmas.view(), - dst_distances.view(), - cugraph::edge_dummy_property_t{}.view(), - brandes_e_op_t{}, - reduce_op::plus()); + auto [new_frontier, new_sigma] = cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( + handle, + graph_view, + vertex_frontier.bucket(bucket_idx_cur), + src_sigmas.view(), + dst_distances.view(), + cugraph::edge_dummy_property_t{}.view(), + brandes_e_op_t{}, + reduce_op::plus()); update_v_frontier(handle, graph_view, diff --git a/cpp/src/community/approx_weighted_matching_impl.cuh b/cpp/src/community/approx_weighted_matching_impl.cuh index a0ccfa52ffc..869ed4e7ae6 100644 --- a/cpp/src/community/approx_weighted_matching_impl.cuh +++ b/cpp/src/community/approx_weighted_matching_impl.cuh @@ -243,11 +243,12 @@ std::tuple, weight_t> approximate_weighted_matchin major_comm_size, minor_comm_size}; - candidates_of_candidates = cugraph::collect_values_for_keys(handle, + candidates_of_candidates = cugraph::collect_values_for_keys(comm, target_candidate_map.view(), candidates.begin(), candidates.end(), - vertex_to_gpu_id_op); + vertex_to_gpu_id_op, + handle.get_stream()); } else { candidates_of_candidates.resize(candidates.size(), handle.get_stream()); diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index e17abdb3703..18fb3fdb251 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -289,11 +289,12 @@ rmm::device_uvector update_clustering_by_delta_modularity( invalid_vertex_id::value, std::numeric_limits::max(), handle.get_stream()); - vertex_cluster_weights_v = cugraph::collect_values_for_keys(handle, + vertex_cluster_weights_v = cugraph::collect_values_for_keys(comm, cluster_key_weight_map.view(), next_clusters_v.begin(), next_clusters_v.end(), - vertex_to_gpu_id_op); + vertex_to_gpu_id_op, + handle.get_stream()); src_cluster_weights = edge_src_property_t, weight_t>(handle, diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 62b66ed5f41..d69c1463edf 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -181,11 +181,12 @@ refine_clustering( comm_size, major_comm_size, minor_comm_size}; vertex_louvain_cluster_weights = - cugraph::collect_values_for_keys(handle, + cugraph::collect_values_for_keys(comm, cluster_key_weight_map.view(), louvain_assignment_of_vertices.begin(), louvain_assignment_of_vertices.end(), - vertex_to_gpu_id_op); + vertex_to_gpu_id_op, + handle.get_stream()); } else { vertex_louvain_cluster_weights.resize(louvain_assignment_of_vertices.size(), @@ -473,11 +474,12 @@ refine_clustering( // comm_size, major_comm_size, minor_comm_size}; louvain_of_leiden_keys_used_in_edge_reduction = - cugraph::collect_values_for_keys(handle, + cugraph::collect_values_for_keys(comm, leiden_to_louvain_map.view(), leiden_keys_used_in_edge_reduction.begin(), leiden_keys_used_in_edge_reduction.end(), - vertex_to_gpu_id_op); + vertex_to_gpu_id_op, + handle.get_stream()); } else { louvain_of_leiden_keys_used_in_edge_reduction.resize( leiden_keys_used_in_edge_reduction.size(), handle.get_stream()); @@ -864,11 +866,12 @@ refine_clustering( // comm_size, major_comm_size, minor_comm_size}; lovain_of_leiden_cluster_keys = - cugraph::collect_values_for_keys(handle, + cugraph::collect_values_for_keys(comm, leiden_to_louvain_map.view(), leiden_keys_to_read_louvain.begin(), leiden_keys_to_read_louvain.end(), - vertex_to_gpu_id_op); + vertex_to_gpu_id_op, + handle.get_stream()); } else { lovain_of_leiden_cluster_keys.resize(leiden_keys_to_read_louvain.size(), handle.get_stream()); diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index 468f4f7280f..219bc3c4d1d 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -16,7 +16,7 @@ #pragma once #include "prims/fill_edge_src_dst_property.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -550,24 +550,25 @@ void weakly_connected_components_impl(raft::handle_t const& handle, auto old_num_edge_inserts = num_edge_inserts.value(handle.get_stream()); resize_dataframe_buffer(edge_buffer, old_num_edge_inserts + max_pushes, handle.get_stream()); - auto new_frontier_tagged_vertex_buffer = transform_reduce_v_frontier_outgoing_e_by_dst( - handle, - level_graph_view, - vertex_frontier.bucket(bucket_idx_cur), - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - e_op_t{ - GraphViewType::is_multi_gpu - ? detail::edge_partition_endpoint_property_device_view_t( - edge_dst_components.mutable_view()) - : detail::edge_partition_endpoint_property_device_view_t( - detail::edge_minor_property_view_t(level_components, - vertex_t{0})), - level_graph_view.local_edge_partition_dst_range_first(), - get_dataframe_buffer_begin(edge_buffer), - num_edge_inserts.data()}, - reduce_op::null()); + auto new_frontier_tagged_vertex_buffer = + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( + handle, + level_graph_view, + vertex_frontier.bucket(bucket_idx_cur), + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + e_op_t{ + GraphViewType::is_multi_gpu + ? detail::edge_partition_endpoint_property_device_view_t( + edge_dst_components.mutable_view()) + : detail::edge_partition_endpoint_property_device_view_t( + detail::edge_minor_property_view_t(level_components, + vertex_t{0})), + level_graph_view.local_edge_partition_dst_range_first(), + get_dataframe_buffer_begin(edge_buffer), + num_edge_inserts.data()}, + reduce_op::null()); update_v_frontier(handle, level_graph_view, diff --git a/cpp/src/cores/core_number_impl.cuh b/cpp/src/cores/core_number_impl.cuh index d807ccac5a5..a2b6f6430f0 100644 --- a/cpp/src/cores/core_number_impl.cuh +++ b/cpp/src/cores/core_number_impl.cuh @@ -16,7 +16,7 @@ #pragma once #include "prims/reduce_v.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -222,14 +222,15 @@ void core_number(raft::handle_t const& handle, if (graph_view.is_symmetric() || ((degree_type == k_core_degree_type_t::IN) || (degree_type == k_core_degree_type_t::INOUT))) { auto [new_frontier_vertex_buffer, delta_buffer] = - transform_reduce_v_frontier_outgoing_e_by_dst(handle, - graph_view, - vertex_frontier.bucket(bucket_idx_cur), - edge_src_dummy_property_t{}.view(), - dst_core_numbers.view(), - edge_dummy_property_t{}.view(), - e_op_t{k, delta}, - reduce_op::plus()); + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( + handle, + graph_view, + vertex_frontier.bucket(bucket_idx_cur), + edge_src_dummy_property_t{}.view(), + dst_core_numbers.view(), + edge_dummy_property_t{}.view(), + e_op_t{k, delta}, + reduce_op::plus()); update_v_frontier( handle, diff --git a/cpp/src/lookup/lookup_src_dst_impl.cuh b/cpp/src/lookup/lookup_src_dst_impl.cuh index 1c8c39fd6dd..45bbf870d80 100644 --- a/cpp/src/lookup/lookup_src_dst_impl.cuh +++ b/cpp/src/lookup/lookup_src_dst_impl.cuh @@ -115,12 +115,13 @@ struct lookup_container_t::lookup_con auto const minor_comm_size = minor_comm.get_size(); value_buffer = cugraph::collect_values_for_keys( - handle, + comm, kv_store_object->view(), edge_ids_to_lookup.begin(), edge_ids_to_lookup.end(), cugraph::detail::compute_gpu_id_from_ext_edge_id_t{ - comm_size, major_comm_size, minor_comm_size}); + comm_size, major_comm_size, minor_comm_size}, + handle.get_stream()); } else { cugraph::resize_dataframe_buffer( value_buffer, edge_ids_to_lookup.size(), handle.get_stream()); diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh index 177c79ace87..2b89d214fd7 100644 --- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh @@ -15,9 +15,11 @@ */ #pragma once +#include "prims/detail/multi_stream_utils.cuh" #include "prims/detail/optional_dataframe_buffer.hpp" #include "prims/detail/prim_functors.cuh" #include "prims/property_op_utils.cuh" +#include "prims/vertex_frontier.cuh" #include #include @@ -72,9 +74,9 @@ __device__ void push_buffer_element(BufferKeyOutputIterator buffer_key_output_fi e_op_result_t e_op_result) { using output_key_t = - typename optional_dataframe_buffer_value_type_t::value; + typename optional_dataframe_buffer_iterator_value_type_t::value; using output_value_t = - typename optional_dataframe_buffer_value_type_t::value; + typename optional_dataframe_buffer_iterator_value_type_t::value; assert(e_op_result.has_value()); @@ -118,7 +120,6 @@ __device__ void warp_push_buffer_elements( } template buffer_idx(*buffer_idx_ptr); - int32_t constexpr shared_array_size = max_one_e_per_frontier_key - ? int32_t{1} /* dummy */ - : extract_transform_v_frontier_e_kernel_block_size; - __shared__ std::conditional_t - warp_local_degree_inclusive_sums[shared_array_size]; - __shared__ std::conditional_t - warp_key_local_edge_offsets[shared_array_size]; + __shared__ edge_t + warp_local_degree_inclusive_sums[extract_transform_v_frontier_e_kernel_block_size]; + __shared__ edge_t warp_key_local_edge_offsets[extract_transform_v_frontier_e_kernel_block_size]; using WarpScan = cub::WarpScan; - __shared__ std:: - conditional_t - temp_storage; + __shared__ typename WarpScan::TempStorage temp_storage; auto indices = edge_partition.indices(); @@ -216,98 +211,74 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( } } - if constexpr (max_one_e_per_frontier_key) { - // each thread processes one frontier key, exits if any edge returns a valid output + auto min_key_idx = static_cast(idx - (idx % raft::warp_size())); // inclusive + auto max_key_idx = + static_cast(std::min(static_cast(min_key_idx) + raft::warp_size(), + static_cast(num_keys))); // exclusive - e_op_result_t e_op_result{thrust::nullopt}; - auto key = *(key_first + idx); + // update warp_local_degree_inclusive_sums & warp_key_local_edge_offsets - if (edge_partition_e_mask) { - for (edge_t i = 0; i < local_degree; ++i) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { - e_op_result = call_e_op(key, edge_offset + i); - if (e_op_result) { break; } - } - } - } else { - for (edge_t i = 0; i < local_degree; ++i) { - e_op_result = call_e_op(key, edge_offset + i); - if (e_op_result) { break; } - } - } - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); - } else { - auto min_key_idx = static_cast(idx - (idx % raft::warp_size())); // inclusive - auto max_key_idx = - static_cast(std::min(static_cast(min_key_idx) + raft::warp_size(), - static_cast(num_keys))); // exclusive - - // update warp_local_degree_inclusive_sums & warp_key_local_edge_offsets - - warp_key_local_edge_offsets[threadIdx.x] = edge_offset; - WarpScan(temp_storage) - .InclusiveSum(local_degree, warp_local_degree_inclusive_sums[threadIdx.x]); - __syncwarp(); + warp_key_local_edge_offsets[threadIdx.x] = edge_offset; + WarpScan(temp_storage) + .InclusiveSum(local_degree, warp_local_degree_inclusive_sums[threadIdx.x]); + __syncwarp(); - // all the threads in a warp collectively process local edges for the keys in [key_first + - // min_key_idx, key_first + max_key_idx) + // all the threads in a warp collectively process local edges for the keys in [key_first + + // min_key_idx, key_first + max_key_idx) - auto num_edges_this_warp = warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + - (max_key_idx - min_key_idx) - 1]; - auto rounded_up_num_edges_this_warp = - ((static_cast(num_edges_this_warp) + (raft::warp_size() - 1)) / raft::warp_size()) * - raft::warp_size(); + auto num_edges_this_warp = warp_local_degree_inclusive_sums[warp_id * raft::warp_size() + + (max_key_idx - min_key_idx) - 1]; + auto rounded_up_num_edges_this_warp = + ((static_cast(num_edges_this_warp) + (raft::warp_size() - 1)) / raft::warp_size()) * + raft::warp_size(); - auto this_warp_inclusive_sum_first = - warp_local_degree_inclusive_sums + warp_id * raft::warp_size(); - auto this_warp_inclusive_sum_last = - this_warp_inclusive_sum_first + (max_key_idx - min_key_idx); + auto this_warp_inclusive_sum_first = + warp_local_degree_inclusive_sums + warp_id * raft::warp_size(); + auto this_warp_inclusive_sum_last = this_warp_inclusive_sum_first + (max_key_idx - min_key_idx); - if (edge_partition_e_mask) { - for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; - - if (i < static_cast(num_edges_this_warp)) { - auto key_idx_this_warp = static_cast(thrust::distance( - this_warp_inclusive_sum_first, - thrust::upper_bound( - thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); - auto local_edge_offset = - warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + - static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} - : *(this_warp_inclusive_sum_first + - (key_idx_this_warp - 1)))); - if ((*edge_partition_e_mask).get(local_edge_offset)) { - auto key = *(key_first + (min_key_idx + key_idx_this_warp)); - e_op_result = call_e_op(key, local_edge_offset); - } - } + if (edge_partition_e_mask) { + for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); - } - } else { - for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { - e_op_result_t e_op_result{thrust::nullopt}; - - if (i < static_cast(num_edges_this_warp)) { - auto key_idx_this_warp = static_cast(thrust::distance( - this_warp_inclusive_sum_first, - thrust::upper_bound( - thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); - auto local_edge_offset = - warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + - static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} - : *(this_warp_inclusive_sum_first + - (key_idx_this_warp - 1)))); + if (i < static_cast(num_edges_this_warp)) { + auto key_idx_this_warp = static_cast(thrust::distance( + this_warp_inclusive_sum_first, + thrust::upper_bound( + thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); + auto local_edge_offset = + warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + + static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} + : *(this_warp_inclusive_sum_first + + (key_idx_this_warp - 1)))); + if ((*edge_partition_e_mask).get(local_edge_offset)) { auto key = *(key_first + (min_key_idx + key_idx_this_warp)); e_op_result = call_e_op(key, local_edge_offset); } + } - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + } + } else { + for (size_t i = lane_id; i < rounded_up_num_edges_this_warp; i += raft::warp_size()) { + e_op_result_t e_op_result{thrust::nullopt}; + + if (i < static_cast(num_edges_this_warp)) { + auto key_idx_this_warp = static_cast(thrust::distance( + this_warp_inclusive_sum_first, + thrust::upper_bound( + thrust::seq, this_warp_inclusive_sum_first, this_warp_inclusive_sum_last, i))); + auto local_edge_offset = + warp_key_local_edge_offsets[warp_id * raft::warp_size() + key_idx_this_warp] + + static_cast(i - ((key_idx_this_warp == 0) ? edge_t{0} + : *(this_warp_inclusive_sum_first + + (key_idx_this_warp - 1)))); + auto key = *(key_first + (min_key_idx + key_idx_this_warp)); + e_op_result = call_e_op(key, local_edge_offset); } + + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } @@ -315,8 +286,7 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree( } } -template buffer_idx(*buffer_idx_ptr); - using WarpReduce = cub::WarpReduce; - __shared__ std::conditional_t - temp_storage[max_one_e_per_frontier_key - ? (extract_transform_v_frontier_e_kernel_block_size / raft::warp_size()) - : int32_t{1} /* dummy */]; - while (idx < static_cast(thrust::distance(key_first, key_last))) { auto key = *(key_first + idx); auto major = thrust_tuple_get_or_identity(key); auto major_offset = edge_partition.major_offset_from_major_nocheck(major); vertex_t const* indices{nullptr}; edge_t local_edge_offset{}; - edge_t local_out_degree{}; - thrust::tie(indices, local_edge_offset, local_out_degree) = + edge_t local_degree{}; + thrust::tie(indices, local_edge_offset, local_degree) = edge_partition.local_edges(major_offset); - auto rounded_up_local_out_degree = - ((static_cast(local_out_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * + auto rounded_up_local_degree = + ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * raft::warp_size(); auto call_e_op = call_e_op_t(local_out_degree)) && + if ((i < static_cast(local_degree)) && ((*edge_partition_e_mask).get(local_edge_offset + i))) { e_op_result = call_e_op(i); } - if constexpr (max_one_e_per_frontier_key) { - auto first_valid_lane_id = - WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) - .Reduce(e_op_result ? lane_id : raft::warp_size(), cub::Min()); - first_valid_lane_id = __shfl_sync(raft::warp_full_mask(), first_valid_lane_id, int{0}); - if (lane_id == first_valid_lane_id) { - auto push_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); - push_buffer_element( - buffer_key_output_first, buffer_value_output_first, push_idx, e_op_result); - } - if (first_valid_lane_id != raft::warp_size()) { break; } - } else { - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); - } + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } else { - for (size_t i = lane_id; i < rounded_up_local_out_degree; i += raft::warp_size()) { + for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { e_op_result_t e_op_result{thrust::nullopt}; - if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } - - if constexpr (max_one_e_per_frontier_key) { - auto first_valid_lane_id = - WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) - .Reduce(e_op_result ? lane_id : raft::warp_size(), cub::Min()); - first_valid_lane_id = __shfl_sync(raft::warp_full_mask(), first_valid_lane_id, int{0}); - if (lane_id == first_valid_lane_id) { - auto push_buffer_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); - push_buffer_element( - buffer_key_output_first, buffer_value_output_first, push_buffer_idx, e_op_result); - } - if (first_valid_lane_id != raft::warp_size()) { break; } - } else { - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); - } + if (i < static_cast(local_degree)) { e_op_result = call_e_op(i); } + + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); } } @@ -446,8 +382,7 @@ __global__ static void extract_transform_v_frontier_e_mid_degree( } } -template edge_partition, KeyIterator key_first, - KeyIterator key_last, + raft::device_span key_local_degree_offsets, EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, @@ -482,123 +417,234 @@ __global__ static void extract_transform_v_frontier_e_high_degree( typename EdgePartitionEdgeValueInputWrapper::value_type, EdgeOp>::type; - auto const warp_id = threadIdx.x / raft::warp_size(); + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; auto const lane_id = threadIdx.x % raft::warp_size(); - auto idx = static_cast(blockIdx.x); - - cuda::atomic_ref buffer_idx(*buffer_idx_ptr); - - using BlockReduce = cub::BlockReduce; - __shared__ std::conditional_t - temp_storage; - __shared__ int32_t output_thread_id; - - while (idx < static_cast(thrust::distance(key_first, key_last))) { - auto key = *(key_first + idx); - auto major = thrust_tuple_get_or_identity(key); - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - vertex_t const* indices{nullptr}; - edge_t local_edge_offset{}; - edge_t local_out_degree{}; - thrust::tie(indices, local_edge_offset, local_out_degree) = - edge_partition.local_edges(major_offset); - auto rounded_up_local_out_degree = ((static_cast(local_out_degree) + - (extract_transform_v_frontier_e_kernel_block_size - 1)) / - extract_transform_v_frontier_e_kernel_block_size) * - extract_transform_v_frontier_e_kernel_block_size; - auto call_e_op = call_e_op_t{edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op, - key, - major_offset, - indices, - local_edge_offset}; + auto idx = static_cast(tid); - if (edge_partition_e_mask) { - for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { - e_op_result_t e_op_result{thrust::nullopt}; - if ((i < static_cast(local_out_degree)) && - ((*edge_partition_e_mask).get(local_edge_offset + i))) { - e_op_result = call_e_op(i); - } + cuda::atomic_ref buffer_idx(*buffer_idx_ptr); - if constexpr (max_one_e_per_frontier_key) { - auto first_valid_thread_id = - BlockReduce(temp_storage) - .Reduce(e_op_result ? threadIdx.x : extract_transform_v_frontier_e_kernel_block_size, - cub::Min()); - if (threadIdx.x == 0) { output_thread_id = first_valid_thread_id; } - __syncthreads(); - if (threadIdx.x == output_thread_id) { - auto push_buffer_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); - push_buffer_element( - buffer_key_output_first, buffer_value_output_first, push_buffer_idx, e_op_result); - } - if (output_thread_id != extract_transform_v_frontier_e_kernel_block_size) { break; } - } else { - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); - } - } - } else { - for (size_t i = threadIdx.x; i < rounded_up_local_out_degree; i += blockDim.x) { - e_op_result_t e_op_result{thrust::nullopt}; - if (i < static_cast(local_out_degree)) { e_op_result = call_e_op(i); } - - if constexpr (max_one_e_per_frontier_key) { - auto first_valid_thread_id = - BlockReduce(temp_storage) - .Reduce(e_op_result ? threadIdx.x : extract_transform_v_frontier_e_kernel_block_size, - cub::Min()); - if (threadIdx.x == 0) { output_thread_id = first_valid_thread_id; } - __syncthreads(); - if (threadIdx.x == output_thread_id) { - auto push_buffer_idx = buffer_idx.fetch_add(1, cuda::std::memory_order_relaxed); - push_buffer_element( - buffer_key_output_first, buffer_value_output_first, push_buffer_idx, e_op_result); - } - if (output_thread_id != extract_transform_v_frontier_e_kernel_block_size) { break; } - } else { - warp_push_buffer_elements( - buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); + auto num_edges = *(key_local_degree_offsets.rbegin()); + size_t rounded_up_num_edges = + ((static_cast(num_edges) + (raft::warp_size() - 1)) / raft::warp_size()) * + raft::warp_size(); + while (idx < rounded_up_num_edges) { + e_op_result_t e_op_result{thrust::nullopt}; + if (idx < num_edges) { + auto key_idx = thrust::distance( + key_local_degree_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, key_local_degree_offsets.begin() + 1, key_local_degree_offsets.end(), idx)); + auto key = *(key_first + key_idx); + auto major = thrust_tuple_get_or_identity(key); + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + vertex_t const* indices{nullptr}; + edge_t local_edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, local_edge_offset, local_degree) = + edge_partition.local_edges(major_offset); + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + key, + major_offset, + indices, + local_edge_offset}; + + auto e_idx = static_cast(idx - key_local_degree_offsets[key_idx]); + if (edge_partition_e_mask) { + if ((*edge_partition_e_mask).get(local_edge_offset + e_idx)) { + e_op_result = call_e_op(e_idx); } + } else { + e_op_result = call_e_op(e_idx); } } + warp_push_buffer_elements( + buffer_key_output_first, buffer_value_output_first, buffer_idx, lane_id, e_op_result); - idx += gridDim.x; + idx += gridDim.x * blockDim.x; + } +} + +template +void extract_transform_v_frontier_e_edge_partition( + raft::handle_t const& handle, + edge_partition_device_view_t edge_partition, + InputKeyIterator edge_partition_frontier_key_first, + InputKeyIterator edge_partition_frontier_key_last, + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, + EdgePartitionValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, + OptionalOutputKeyIterator output_key_first, + OptionalOutputValueIterator output_value_first, + raft::device_span count /* size = 1 */, + EdgeOp e_op, + std::optional> high_segment_key_local_degree_offsets, + std::optional high_segment_edge_count, + std::optional> key_segment_offsets, + std::optional> const& edge_partition_stream_pool_indices) +{ + size_t stream_pool_size{0}; + if (edge_partition_stream_pool_indices) { + stream_pool_size = (*edge_partition_stream_pool_indices).size(); + } + if (key_segment_offsets) { + if (((*key_segment_offsets)[1] > 0) && ((*high_segment_edge_count) > 0)) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[0 % stream_pool_size]) + : handle.get_stream(); + + raft::grid_1d_thread_t update_grid((*high_segment_edge_count), + extract_transform_v_frontier_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + extract_transform_v_frontier_e_high_degree + <<>>( + edge_partition, + edge_partition_frontier_key_first, + raft::device_span((*high_segment_key_local_degree_offsets).data(), + (*high_segment_key_local_degree_offsets).size()), + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_key_first, + output_value_first, + count.data(), + e_op); + } + if ((*key_segment_offsets)[2] - (*key_segment_offsets)[1] > 0) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[1 % stream_pool_size]) + : handle.get_stream(); + raft::grid_1d_warp_t update_grid((*key_segment_offsets)[2] - (*key_segment_offsets)[1], + extract_transform_v_frontier_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + extract_transform_v_frontier_e_mid_degree + <<>>( + edge_partition, + edge_partition_frontier_key_first + (*key_segment_offsets)[1], + edge_partition_frontier_key_first + (*key_segment_offsets)[2], + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_key_first, + output_value_first, + count.data(), + e_op); + } + if ((*key_segment_offsets)[3] - (*key_segment_offsets)[2] > 0) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[2 % stream_pool_size]) + : handle.get_stream(); + raft::grid_1d_thread_t update_grid((*key_segment_offsets)[3] - (*key_segment_offsets)[2], + extract_transform_v_frontier_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + extract_transform_v_frontier_e_hypersparse_or_low_degree + <<>>( + edge_partition, + edge_partition_frontier_key_first + (*key_segment_offsets)[2], + edge_partition_frontier_key_first + (*key_segment_offsets)[3], + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_key_first, + output_value_first, + count.data(), + e_op); + } + if (edge_partition.dcs_nzd_vertex_count() && + ((*key_segment_offsets)[4] - (*key_segment_offsets)[3] > 0)) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[3 % stream_pool_size]) + : handle.get_stream(); + raft::grid_1d_thread_t update_grid((*key_segment_offsets)[4] - (*key_segment_offsets)[3], + extract_transform_v_frontier_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + extract_transform_v_frontier_e_hypersparse_or_low_degree + <<>>( + edge_partition, + edge_partition_frontier_key_first + (*key_segment_offsets)[3], + edge_partition_frontier_key_first + (*key_segment_offsets)[4], + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_key_first, + output_value_first, + count.data(), + e_op); + } + } else { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[0 % stream_pool_size]) + : handle.get_stream(); + + auto frontier_size = static_cast( + thrust::distance(edge_partition_frontier_key_first, edge_partition_frontier_key_last)); + if (frontier_size > 0) { + raft::grid_1d_thread_t update_grid(frontier_size, + extract_transform_v_frontier_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + + extract_transform_v_frontier_e_hypersparse_or_low_degree + <<>>( + edge_partition, + edge_partition_frontier_key_first, + edge_partition_frontier_key_last, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_key_first, + output_value_first, + count.data(), + e_op); + } } } template -std::tuple< - decltype(allocate_optional_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})), - decltype(allocate_optional_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{}))> +std::tuple, + optional_dataframe_buffer_type_t> extract_transform_v_frontier_e(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier, + KeyBucketType const& frontier, EdgeSrcValueInputWrapper edge_src_value_input, EdgeDstValueInputWrapper edge_dst_value_input, EdgeValueInputWrapper edge_value_input, @@ -607,7 +653,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; - using key_t = typename VertexFrontierBucketType::key_type; + using key_t = typename KeyBucketType::key_type; using output_key_t = OutputKeyT; using output_value_t = OutputValueT; @@ -653,6 +699,9 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, thrust::optional, thrust::optional>>>); + constexpr bool try_bitmap = GraphViewType::is_multi_gpu && std::is_same_v && + KeyBucketType::is_sorted_unique; + if (do_expensive_check) { auto frontier_vertex_first = thrust_tuple_get_or_identity(frontier.begin()); @@ -673,10 +722,15 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, "Invalid input argument: frontier includes out-of-range keys."); } + [[maybe_unused]] constexpr auto max_segments = + detail::num_sparse_segments_per_vertex_partition + size_t{1}; + + // 1. pre-process frontier data + auto frontier_key_first = frontier.begin(); auto frontier_key_last = frontier.end(); auto frontier_keys = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - if constexpr (!VertexFrontierBucketType::is_sorted_unique) { + if constexpr (!KeyBucketType::is_sorted_unique) { resize_dataframe_buffer(frontier_keys, frontier.size(), handle.get_stream()); thrust::copy(handle.get_thrust_policy(), frontier_key_first, @@ -689,209 +743,708 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, frontier_key_last = get_dataframe_buffer_end(frontier_keys); } - // 1. fill the buffers + std::optional> key_segment_offsets{std::nullopt}; + { // drop zero degree vertices & compute key_segment_offsets + size_t partition_idx{0}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + partition_idx = static_cast(minor_comm.get_rank()); + } + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(partition_idx); + if (segment_offsets) { + if (thrust::distance(frontier_key_first, frontier_key_last) > 0) { + key_segment_offsets = compute_key_segment_offsets( + frontier_key_first, + frontier_key_last, + raft::host_span((*segment_offsets).data(), (*segment_offsets).size()), + graph_view.local_vertex_partition_range_first(), + handle.get_stream()); + (*key_segment_offsets).back() = *((*key_segment_offsets).rbegin() + 1); + frontier_key_last = frontier_key_first + (*key_segment_offsets).back(); + } else { + key_segment_offsets = std::vector((*segment_offsets).size(), 0); + } + } + } + + // 2. compute local max_pushes - auto key_buffer = - allocate_optional_dataframe_buffer(size_t{0}, handle.get_stream()); - auto value_buffer = - allocate_optional_dataframe_buffer(size_t{0}, handle.get_stream()); - rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); + size_t local_max_pushes{}; + { + size_t partition_idx{}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + partition_idx = static_cast(minor_comm_rank); + } + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + auto frontier_major_first = + thrust_tuple_get_or_identity(frontier_key_first); + auto frontier_major_last = + thrust_tuple_get_or_identity(frontier_key_last); + // for an edge-masked graph, we can pass edge mask to compute tighter bound (at the expense of + // additional computing) + local_max_pushes = edge_partition.compute_number_of_edges( + frontier_major_first, frontier_major_last, handle.get_stream()); + } + + // 3. communication over minor_comm std::vector local_frontier_sizes{}; + std::conditional_t, std::byte /* dummy */> + max_tmp_buffer_sizes{}; + std::conditional_t, std::byte /* dummy */> + tmp_buffer_size_per_loop_approximations{}; + std::conditional_t, std::byte /* dummy */> + local_frontier_range_firsts{}; + std::conditional_t, std::byte /* dummy */> + local_frontier_range_lasts{}; + std::optional>> key_segment_offset_vectors{std::nullopt}; if constexpr (GraphViewType::is_multi_gpu) { - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - local_frontier_sizes = host_scalar_allgather( - minor_comm, - static_cast(thrust::distance(frontier_key_first, frontier_key_last)), - handle.get_stream()); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + + auto max_tmp_buffer_size = + static_cast(static_cast(handle.get_device_properties().totalGlobalMem) * 0.2); + size_t approx_tmp_buffer_size_per_loop{}; + { + size_t key_size{0}; + if constexpr (std::is_arithmetic_v) { + key_size = sizeof(key_t); + } else { + key_size = cugraph::sum_thrust_tuple_element_sizes(); + } + size_t output_key_size{0}; + if constexpr (!std::is_same_v) { + if constexpr (std::is_arithmetic_v) { + output_key_size = sizeof(output_key_t); + } else { + output_key_size = cugraph::sum_thrust_tuple_element_sizes(); + } + } + size_t output_value_size{0}; + if constexpr (!std::is_same_v) { + if constexpr (std::is_arithmetic_v) { + output_value_size = sizeof(output_value_t); + } else { + output_value_size = cugraph::sum_thrust_tuple_element_sizes(); + } + } + approx_tmp_buffer_size_per_loop = + static_cast(thrust::distance(frontier_key_first, frontier_key_last)) * key_size + + local_max_pushes * (output_key_size + output_value_size); + } + + size_t num_scalars = + 3; // local_frontier_size, max_tmp_buffer_size, approx_tmp_buffer_size_per_loop + if constexpr (try_bitmap) { + num_scalars += 2; // local_frontier_range_first, local_frontier_range_last + } + if (key_segment_offsets) { num_scalars += (*key_segment_offsets).size(); } + rmm::device_uvector d_aggregate_tmps(minor_comm_size * num_scalars, + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + d_aggregate_tmps.begin() + num_scalars * minor_comm_rank, + d_aggregate_tmps.begin() + (num_scalars * minor_comm_rank + (try_bitmap ? 5 : 3)), + [frontier_key_first, + max_tmp_buffer_size, + approx_tmp_buffer_size_per_loop, + v_list_size = static_cast(thrust::distance(frontier_key_first, frontier_key_last)), + vertex_partition_range_first = + graph_view.local_vertex_partition_range_first()] __device__(size_t i) { + if (i == 0) { + return v_list_size; + } else if (i == 1) { + return max_tmp_buffer_size; + } else if (i == 2) { + return approx_tmp_buffer_size_per_loop; + } + if constexpr (try_bitmap) { + if (i == 3) { + vertex_t first{}; + if (v_list_size > 0) { + first = *frontier_key_first; + } else { + first = vertex_partition_range_first; + } + assert(static_cast(static_cast(first)) == first); + return static_cast(first); + } else if (i == 4) { + assert(i == 4); + vertex_t last{}; + if (v_list_size > 0) { + last = *(frontier_key_first + (v_list_size - 1)) + 1; + } else { + last = vertex_partition_range_first; + } + assert(static_cast(static_cast(last)) == last); + return static_cast(last); + } + } + assert(false); + return size_t{0}; + }); + if (key_segment_offsets) { + raft::update_device( + d_aggregate_tmps.data() + (minor_comm_rank * num_scalars + (try_bitmap ? 5 : 3)), + (*key_segment_offsets).data(), + (*key_segment_offsets).size(), + handle.get_stream()); + } + + if (minor_comm_size > 1) { + device_allgather(minor_comm, + d_aggregate_tmps.data() + minor_comm_rank * num_scalars, + d_aggregate_tmps.data(), + num_scalars, + handle.get_stream()); + } + + std::vector h_aggregate_tmps(d_aggregate_tmps.size()); + raft::update_host(h_aggregate_tmps.data(), + d_aggregate_tmps.data(), + d_aggregate_tmps.size(), + handle.get_stream()); + handle.sync_stream(); + local_frontier_sizes = std::vector(minor_comm_size); + max_tmp_buffer_sizes = std::vector(minor_comm_size); + tmp_buffer_size_per_loop_approximations = std::vector(minor_comm_size); + if constexpr (try_bitmap) { + local_frontier_range_firsts = std::vector(minor_comm_size); + local_frontier_range_lasts = std::vector(minor_comm_size); + } + if (key_segment_offsets) { + key_segment_offset_vectors = std::vector>{}; + (*key_segment_offset_vectors).reserve(minor_comm_size); + } + for (int i = 0; i < minor_comm_size; ++i) { + local_frontier_sizes[i] = h_aggregate_tmps[i * num_scalars]; + max_tmp_buffer_sizes[i] = h_aggregate_tmps[i * num_scalars + 1]; + tmp_buffer_size_per_loop_approximations[i] = h_aggregate_tmps[i * num_scalars + 2]; + if constexpr (try_bitmap) { + local_frontier_range_firsts[i] = + static_cast(h_aggregate_tmps[i * num_scalars + 3]); + local_frontier_range_lasts[i] = + static_cast(h_aggregate_tmps[i * num_scalars + 4]); + } + if (key_segment_offsets) { + (*key_segment_offset_vectors) + .emplace_back(h_aggregate_tmps.begin() + (i * num_scalars + (try_bitmap ? 5 : 3)), + h_aggregate_tmps.begin() + + (i * num_scalars + (try_bitmap ? 5 : 3) + (*key_segment_offsets).size())); + } + } } else { local_frontier_sizes = std::vector{static_cast( static_cast(thrust::distance(frontier_key_first, frontier_key_last)))}; + if (key_segment_offsets) { + key_segment_offset_vectors = std::vector>(1); + (*key_segment_offset_vectors)[0] = *key_segment_offsets; + } + } + + // update frontier bitmap (used to reduce broadcast bandwidth size) + + bool v_compressible{false}; + std:: + conditional_t>, std::byte /* dummy */> + frontier_bitmap{}; + std:: + conditional_t>, std::byte /* dummy */> + compressed_frontier{}; + if constexpr (try_bitmap) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + if (minor_comm_size > 1) { + auto const minor_comm_rank = minor_comm.get_rank(); + + if constexpr (sizeof(vertex_t) == 8) { + vertex_t local_frontier_max_range_size{0}; + for (int i = 0; i < minor_comm_size; ++i) { + auto range_size = local_frontier_range_lasts[i] - local_frontier_range_firsts[i]; + local_frontier_max_range_size = std::max(range_size, local_frontier_max_range_size); + } + if (local_frontier_max_range_size <= + std::numeric_limits::max()) { // broadcast 32 bit offset values instead of 64 + // bit vertex IDs + v_compressible = true; + } + } + + double avg_fill_ratio{0.0}; + for (int i = 0; i < minor_comm_size; ++i) { + auto num_keys = static_cast(local_frontier_sizes[i]); + auto range_size = local_frontier_range_lasts[i] - local_frontier_range_firsts[i]; + avg_fill_ratio += + (range_size > 0) ? (num_keys / static_cast(range_size)) : double{0.0}; + } + avg_fill_ratio /= static_cast(minor_comm_size); + constexpr double threshold_ratio = + 8.0 /* tuning parameter */ / static_cast(sizeof(vertex_t) * 8); + auto avg_frontier_size = + std::reduce(local_frontier_sizes.begin(), local_frontier_sizes.end()) / + static_cast(minor_comm_size); + + if ((avg_fill_ratio > threshold_ratio) && + (static_cast(avg_frontier_size) > + packed_bools_per_word() * + 32 /* tuning parameter, to consider additional kernel launch overhead */)) { + frontier_bitmap = + compute_vertex_list_bitmap_info(frontier_key_first, + frontier_key_last, + local_frontier_range_firsts[minor_comm_rank], + local_frontier_range_lasts[minor_comm_rank], + handle.get_stream()); + } else if (v_compressible) { + rmm::device_uvector tmps(local_frontier_sizes[minor_comm_rank], + handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + frontier_key_first, + frontier_key_last, + tmps.begin(), + cuda::proclaim_return_type( + [range_first = local_frontier_range_firsts[minor_comm_rank]] __device__( + auto v) { return static_cast(v - range_first); })); + compressed_frontier = std::move(tmps); + } + } + } + + // set-up stream ppol + + std::optional> stream_pool_indices{std::nullopt}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto max_tmp_buffer_size = + std::reduce(max_tmp_buffer_sizes.begin(), max_tmp_buffer_sizes.end()) / + static_cast(minor_comm_size); + auto approx_tmp_buffer_size_per_loop = + std::reduce(tmp_buffer_size_per_loop_approximations.begin(), + tmp_buffer_size_per_loop_approximations.end()) / + static_cast(minor_comm_size); + size_t num_streams_per_loop{1}; + if (graph_view.local_vertex_partition_segment_offsets() && + (handle.get_stream_pool_size() >= max_segments)) { + num_streams_per_loop = std::max( + std::min(size_t{8} / graph_view.number_of_local_edge_partitions(), max_segments), + size_t{ + 1}); // Note that "CUDA_DEVICE_MAX_CONNECTIONS (default: 8, can be set to [1, 32])" sets + // the number of queues, if the total number of streams exceeds this number, jobs on + // different streams can be sent to one queue leading to false dependency. Setting + // num_concurrent_loops above the number of queues has some benefits in NCCL + // communications but creating too many streams just for compute may not help. + } + stream_pool_indices = init_stream_pool_indices(max_tmp_buffer_size, + approx_tmp_buffer_size_per_loop, + graph_view.number_of_local_edge_partitions(), + num_streams_per_loop, + handle.get_stream_pool_size()); + if ((*stream_pool_indices).size() <= 1) { stream_pool_indices = std::nullopt; } + } + + size_t num_concurrent_loops{1}; + std::optional> loop_stream_pool_indices{ + std::nullopt}; // first num_concurrent_loopos streams from stream_pool_indices + if (stream_pool_indices) { + num_concurrent_loops = + std::min(graph_view.number_of_local_edge_partitions(), (*stream_pool_indices).size()); + loop_stream_pool_indices = std::vector(num_concurrent_loops); + std::iota((*loop_stream_pool_indices).begin(), (*loop_stream_pool_indices).end(), size_t{0}); } + rmm::device_uvector counters(num_concurrent_loops, handle.get_stream()); + + if constexpr (!GraphViewType::is_multi_gpu) { + if (loop_stream_pool_indices) { handle.sync_stream(); } + } + + // 2. fill the buffers + + std::vector> key_buffers{}; + std::vector> value_buffers{}; + key_buffers.reserve(graph_view.number_of_local_edge_partitions()); + value_buffers.reserve(graph_view.number_of_local_edge_partitions()); + auto edge_mask_view = graph_view.edge_mask_view(); - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - auto edge_partition_e_mask = - edge_mask_view - ? thrust::make_optional< - detail::edge_partition_edge_property_device_view_t>( - *edge_mask_view, i) - : thrust::nullopt; - - auto edge_partition_frontier_key_buffer = - allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - vertex_t edge_partition_frontier_size = static_cast(local_frontier_sizes[i]); - auto edge_partition_frontier_key_first = frontier_key_first; - auto edge_partition_frontier_key_last = frontier_key_last; + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); i += num_concurrent_loops) { + auto loop_count = + std::min(num_concurrent_loops, graph_view.number_of_local_edge_partitions() - i); + + std::conditional_t< + GraphViewType::is_multi_gpu, + std::conditional_t< + try_bitmap, + std::vector, rmm::device_uvector>>, + std::vector>>, + std::byte /* dummy */> + edge_partition_key_buffers{}; if constexpr (GraphViewType::is_multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + + edge_partition_key_buffers.reserve(loop_count); + + std::conditional_t>>, + std::byte /* dummy */> + edge_partition_bitmap_buffers{}; + if constexpr (try_bitmap) { + if (frontier_bitmap) { + edge_partition_bitmap_buffers = std::vector>{}; + (*edge_partition_bitmap_buffers).reserve(loop_count); + } + } - resize_dataframe_buffer( - edge_partition_frontier_key_buffer, edge_partition_frontier_size, handle.get_stream()); - - device_bcast(minor_comm, - frontier_key_first, - get_dataframe_buffer_begin(edge_partition_frontier_key_buffer), - edge_partition_frontier_size, - static_cast(i), - handle.get_stream()); - - edge_partition_frontier_key_first = - get_dataframe_buffer_begin(edge_partition_frontier_key_buffer); - edge_partition_frontier_key_last = - get_dataframe_buffer_end(edge_partition_frontier_key_buffer); - } - - auto edge_partition_frontier_major_first = - thrust_tuple_get_or_identity( - edge_partition_frontier_key_first); - auto edge_partition_frontier_major_last = - thrust_tuple_get_or_identity( - edge_partition_frontier_key_last); - - auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); - auto max_pushes = max_one_e_per_frontier_key ? edge_partition_frontier_size - : edge_partition.compute_number_of_edges( - edge_partition_frontier_major_first, - edge_partition_frontier_major_last, - handle.get_stream()); - - auto new_buffer_size = buffer_idx.value(handle.get_stream()) + max_pushes; - resize_optional_dataframe_buffer( - key_buffer, new_buffer_size, handle.get_stream()); - resize_optional_dataframe_buffer( - value_buffer, new_buffer_size, handle.get_stream()); - - edge_partition_src_input_device_view_t edge_partition_src_value_input{}; - edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; - if constexpr (GraphViewType::is_storage_transposed) { - edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input); - edge_partition_dst_value_input = - edge_partition_dst_input_device_view_t(edge_dst_value_input, i); - } else { - edge_partition_src_value_input = - edge_partition_src_input_device_view_t(edge_src_value_input, i); - edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + + bool use_bitmap_buffer = false; + if constexpr (try_bitmap) { + if (edge_partition_bitmap_buffers) { + (*edge_partition_bitmap_buffers) + .emplace_back(packed_bool_size(local_frontier_range_lasts[partition_idx] - + local_frontier_range_firsts[partition_idx]), + handle.get_stream()); + use_bitmap_buffer = true; + } + } + if (!use_bitmap_buffer) { + bool allocated{false}; + if constexpr (try_bitmap) { + if (v_compressible) { + edge_partition_key_buffers.push_back(rmm::device_uvector( + local_frontier_sizes[partition_idx], handle.get_stream())); + allocated = true; + } + } + if (!allocated) { + edge_partition_key_buffers.push_back(allocate_dataframe_buffer( + local_frontier_sizes[partition_idx], handle.get_stream())); + } + } + } + + device_group_start(minor_comm); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + + if constexpr (try_bitmap) { + if (frontier_bitmap) { + device_bcast(minor_comm, + (*frontier_bitmap).data(), + get_dataframe_buffer_begin((*edge_partition_bitmap_buffers)[j]), + size_dataframe_buffer((*edge_partition_bitmap_buffers)[j]), + static_cast(partition_idx), + handle.get_stream()); + } else if (compressed_frontier) { + device_bcast(minor_comm, + (*compressed_frontier).data(), + get_dataframe_buffer_begin(std::get<0>(edge_partition_key_buffers[j])), + local_frontier_sizes[partition_idx], + static_cast(partition_idx), + handle.get_stream()); + } else { + device_bcast(minor_comm, + frontier_key_first, + get_dataframe_buffer_begin(std::get<1>(edge_partition_key_buffers[j])), + local_frontier_sizes[partition_idx], + static_cast(partition_idx), + handle.get_stream()); + } + } else { + device_bcast(minor_comm, + frontier_key_first, + get_dataframe_buffer_begin(edge_partition_key_buffers[j]), + local_frontier_sizes[partition_idx], + static_cast(partition_idx), + handle.get_stream()); + } + } + device_group_end(minor_comm); + if (loop_stream_pool_indices) { handle.sync_stream(); } + + if constexpr (try_bitmap) { + if (edge_partition_bitmap_buffers) { + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + std::variant, rmm::device_uvector> keys = + rmm::device_uvector(0, loop_stream); + if (v_compressible) { + std::get<0>(keys).resize(local_frontier_sizes[partition_idx], loop_stream); + } else { + keys = + rmm::device_uvector(local_frontier_sizes[partition_idx], loop_stream); + } + + auto& rx_bitmap = (*edge_partition_bitmap_buffers)[j]; + + auto range_first = local_frontier_range_firsts[partition_idx]; + auto range_last = local_frontier_range_lasts[partition_idx]; + if (keys.index() == 0) { + retrieve_vertex_list_from_bitmap( + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + get_dataframe_buffer_begin(std::get<0>(keys)), + raft::device_span(counters.data() + j, + size_t{1}), // dummy, we already know the counts + uint32_t{0}, + static_cast(range_last - range_first), + loop_stream); + } else { + retrieve_vertex_list_from_bitmap( + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + get_dataframe_buffer_begin(std::get<1>(keys)), + raft::device_span(counters.data() + j, + size_t{1}), // dummy, we already know the counts + range_first, + range_last, + loop_stream); + } + + edge_partition_key_buffers.push_back(std::move(keys)); + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + (*edge_partition_bitmap_buffers).clear(); + } + } } - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); - if (segment_offsets) { - static_assert(num_sparse_segments_per_vertex_partition == 3); - std::vector h_thresholds(num_sparse_segments_per_vertex_partition + - (graph_view.use_dcs() ? 1 : 0) - 1); - h_thresholds[0] = edge_partition.major_range_first() + (*segment_offsets)[1]; - h_thresholds[1] = edge_partition.major_range_first() + (*segment_offsets)[2]; - if (graph_view.use_dcs()) { - h_thresholds[2] = edge_partition.major_range_first() + (*segment_offsets)[3]; + std::vector> output_key_buffers{}; + output_key_buffers.reserve(loop_count); + std::vector> output_value_buffers{}; + output_value_buffers.reserve(loop_count); + std::vector edge_partition_max_push_counts(loop_count); + + std::optional>> + high_segment_key_local_degree_offset_vectors{std::nullopt}; + std::optional> high_segment_edge_counts{std::nullopt}; + if (key_segment_offset_vectors) { + high_segment_key_local_degree_offset_vectors = std::vector>{}; + (*high_segment_key_local_degree_offset_vectors).reserve(loop_count); + high_segment_edge_counts = std::vector(loop_count); + } + + edge_partition_max_push_counts[0] = local_max_pushes; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + if (minor_comm_size > 1) { + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + if (static_cast(partition_idx) != minor_comm_rank) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + + auto const& keys = edge_partition_key_buffers[j]; + + bool computed{false}; + if constexpr (try_bitmap) { + if (keys.index() == 0) { + auto major_first = thrust::make_transform_iterator( + std::get<0>(keys).begin(), + cuda::proclaim_return_type( + [range_first = + local_frontier_range_firsts[partition_idx]] __device__(uint32_t v_offset) { + return range_first + static_cast(v_offset); + })); + edge_partition.compute_number_of_edges_async( + major_first, + major_first + std::get<0>(keys).size(), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + computed = true; + } + } + if (!computed) { + dataframe_buffer_const_iterator_type_t key_first{}; + size_t num_keys{}; + if constexpr (try_bitmap) { + assert(keys.index() == 1); + key_first = get_dataframe_buffer_begin(std::get<1>(keys)); + num_keys = std::get<1>(keys).size(); + } else { + key_first = get_dataframe_buffer_begin(keys); + num_keys = size_dataframe_buffer(keys); + } + auto major_first = thrust_tuple_get_or_identity(key_first); + edge_partition.compute_number_of_edges_async( + major_first, + major_first + num_keys, + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } + } + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + raft::update_host( + edge_partition_max_push_counts.data(), counters.data(), loop_count, handle.get_stream()); + handle.sync_stream(); + if (static_cast(minor_comm_rank / num_concurrent_loops) == + (i / num_concurrent_loops)) { + edge_partition_max_push_counts[minor_comm_rank % num_concurrent_loops] = local_max_pushes; + } } - rmm::device_uvector d_thresholds(h_thresholds.size(), handle.get_stream()); - raft::update_device( - d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); - rmm::device_uvector d_offsets(d_thresholds.size(), handle.get_stream()); - thrust::lower_bound(handle.get_thrust_policy(), - edge_partition_frontier_major_first, - edge_partition_frontier_major_last, - d_thresholds.begin(), - d_thresholds.end(), - d_offsets.begin()); - std::vector h_offsets(d_offsets.size()); - raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream()); - RAFT_CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - h_offsets.push_back(edge_partition_frontier_size); - // FIXME: we may further improve performance by 1) concurrently running kernels on different - // segments; 2) individually tuning block sizes for different segments; and 3) adding one - // more segment for very high degree vertices and running segmented reduction - if (h_offsets[0] > 0) { - raft::grid_1d_block_t update_grid(h_offsets[0], - extract_transform_v_frontier_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_high_degree - <<>>( - edge_partition, - edge_partition_frontier_key_first, - edge_partition_frontier_key_first + h_offsets[0], - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - get_optional_dataframe_buffer_begin(key_buffer), - get_optional_dataframe_buffer_begin(value_buffer), - buffer_idx.data(), - e_op); + } + + if (key_segment_offset_vectors) { + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + + auto const& key_segment_offsets = (*key_segment_offset_vectors)[partition_idx]; + rmm::device_uvector high_segment_key_local_degree_offsets( + key_segment_offsets[1] + 1, loop_stream); + high_segment_key_local_degree_offsets.set_element_to_zero_async(0, loop_stream); + bool computed{false}; + if constexpr (try_bitmap) { + auto const& keys = edge_partition_key_buffers[j]; + if (keys.index() == 0) { + auto key_local_degree_first = thrust::make_transform_iterator( + std::get<0>(keys).begin(), + cuda::proclaim_return_type( + [edge_partition, + range_first = + local_frontier_range_firsts[partition_idx]] __device__(uint32_t v_offset) { + auto major = range_first + static_cast(v_offset); + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + return static_cast(edge_partition.local_degree(major_offset)); + })); + thrust::inclusive_scan(rmm::exec_policy_nosync(loop_stream), + key_local_degree_first, + key_local_degree_first + key_segment_offsets[1], + high_segment_key_local_degree_offsets.begin() + 1); + computed = true; + } + } + if (!computed) { + auto key_first = frontier_key_first; + if constexpr (GraphViewType::is_multi_gpu) { + auto const& keys = edge_partition_key_buffers[j]; + if constexpr (try_bitmap) { + assert(keys.index() == 1); + key_first = get_dataframe_buffer_begin(std::get<1>(keys)); + } else { + key_first = get_dataframe_buffer_begin(keys); + } + } + auto key_local_degree_first = thrust::make_transform_iterator( + key_first, cuda::proclaim_return_type([edge_partition] __device__(auto key) { + auto major = thrust_tuple_get_or_identity(key); + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + return static_cast(edge_partition.local_degree(major_offset)); + })); + thrust::inclusive_scan(rmm::exec_policy_nosync(loop_stream), + key_local_degree_first, + key_local_degree_first + key_segment_offsets[1], + high_segment_key_local_degree_offsets.begin() + 1); + } + raft::update_host((*high_segment_edge_counts).data() + j, + high_segment_key_local_degree_offsets.data() + key_segment_offsets[1], + 1, + loop_stream); + (*high_segment_key_local_degree_offset_vectors) + .push_back(std::move(high_segment_key_local_degree_offsets)); } - if (h_offsets[1] - h_offsets[0] > 0) { - raft::grid_1d_warp_t update_grid(h_offsets[1] - h_offsets[0], - extract_transform_v_frontier_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_mid_degree - <<>>( - edge_partition, - edge_partition_frontier_key_first + h_offsets[0], - edge_partition_frontier_key_first + h_offsets[1], - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - get_optional_dataframe_buffer_begin(key_buffer), - get_optional_dataframe_buffer_begin(value_buffer), - buffer_idx.data(), - e_op); + + // to ensure that *high_segment_edge_counts[] is valid + if (loop_stream_pool_indices) { + handle.sync_stream_pool(*loop_stream_pool_indices); + } else { + handle.sync_stream(); } - if (h_offsets[2] - h_offsets[1] > 0) { - raft::grid_1d_thread_t update_grid(h_offsets[2] - h_offsets[1], - extract_transform_v_frontier_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_hypersparse_or_low_degree - <<>>( - edge_partition, - edge_partition_frontier_key_first + h_offsets[1], - edge_partition_frontier_key_first + h_offsets[2], - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - get_optional_dataframe_buffer_begin(key_buffer), - get_optional_dataframe_buffer_begin(value_buffer), - buffer_idx.data(), - e_op); + } + + for (size_t j = 0; j < loop_count; ++j) { + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + output_key_buffers.push_back(allocate_optional_dataframe_buffer( + edge_partition_max_push_counts[j], loop_stream)); + output_value_buffers.push_back(allocate_optional_dataframe_buffer( + edge_partition_max_push_counts[j], loop_stream)); + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + + thrust::fill( + handle.get_thrust_policy(), counters.begin(), counters.begin() + loop_count, size_t{0}); + if (loop_stream_pool_indices) { handle.sync_stream(); } + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, partition_idx) + : thrust::nullopt; + size_t num_streams_per_loop{1}; + if (stream_pool_indices) { + assert((*stream_pool_indices).size() >= num_concurrent_loops); + num_streams_per_loop = (*stream_pool_indices).size() / num_concurrent_loops; } - if (edge_partition.dcs_nzd_vertex_count() && (h_offsets[3] - h_offsets[2] > 0)) { - raft::grid_1d_thread_t update_grid(h_offsets[3] - h_offsets[2], - extract_transform_v_frontier_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - extract_transform_v_frontier_e_hypersparse_or_low_degree - <<>>( - edge_partition, - edge_partition_frontier_key_first + h_offsets[2], - edge_partition_frontier_key_first + h_offsets[3], - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - get_optional_dataframe_buffer_begin(key_buffer), - get_optional_dataframe_buffer_begin(value_buffer), - buffer_idx.data(), - e_op); + auto edge_partition_stream_pool_indices = + stream_pool_indices + ? std::make_optional>( + (*stream_pool_indices).data() + j * num_streams_per_loop, num_streams_per_loop) + : std::nullopt; + + edge_partition_src_input_device_view_t edge_partition_src_value_input{}; + edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; + if constexpr (GraphViewType::is_storage_transposed) { + edge_partition_src_value_input = + edge_partition_src_input_device_view_t(edge_src_value_input); + edge_partition_dst_value_input = + edge_partition_dst_input_device_view_t(edge_dst_value_input, partition_idx); + } else { + edge_partition_src_value_input = + edge_partition_src_input_device_view_t(edge_src_value_input, partition_idx); + edge_partition_dst_value_input = + edge_partition_dst_input_device_view_t(edge_dst_value_input); } - } else { - if (edge_partition_frontier_size > 0) { - raft::grid_1d_thread_t update_grid(edge_partition_frontier_size, - extract_transform_v_frontier_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - - extract_transform_v_frontier_e_hypersparse_or_low_degree - <<>>( + auto edge_partition_e_value_input = + edge_partition_e_input_device_view_t(edge_value_input, partition_idx); + + bool computed{false}; + if constexpr (try_bitmap) { + auto const& keys = edge_partition_key_buffers[j]; + if (keys.index() == 0) { + auto edge_partition_frontier_key_first = thrust::make_transform_iterator( + std::get<0>(keys).begin(), + cuda::proclaim_return_type( + [range_first = local_frontier_range_firsts[partition_idx]] __device__( + uint32_t v_offset) { return range_first + static_cast(v_offset); })); + auto edge_partition_frontier_key_last = + edge_partition_frontier_key_first + std::get<0>(keys).size(); + extract_transform_v_frontier_e_edge_partition( + handle, edge_partition, edge_partition_frontier_key_first, edge_partition_frontier_key_last, @@ -899,24 +1452,150 @@ extract_transform_v_frontier_e(raft::handle_t const& handle, edge_partition_dst_value_input, edge_partition_e_value_input, edge_partition_e_mask, - get_optional_dataframe_buffer_begin(key_buffer), - get_optional_dataframe_buffer_begin(value_buffer), - buffer_idx.data(), - e_op); + get_optional_dataframe_buffer_begin(output_key_buffers[j]), + get_optional_dataframe_buffer_begin(output_value_buffers[j]), + raft::device_span(counters.data() + j, size_t{1}), + e_op, + high_segment_key_local_degree_offset_vectors + ? std::make_optional>( + (*high_segment_key_local_degree_offset_vectors)[j].data(), + (*high_segment_key_local_degree_offset_vectors)[j].size()) + : std::nullopt, + high_segment_edge_counts ? std::make_optional((*high_segment_edge_counts)[j]) + : std::nullopt, + key_segment_offset_vectors ? std::make_optional>( + (*key_segment_offset_vectors)[partition_idx].data(), + (*key_segment_offset_vectors)[partition_idx].size()) + : std::nullopt, + edge_partition_stream_pool_indices); + computed = true; + } + } + if (!computed) { + auto edge_partition_frontier_key_first = frontier_key_first; + auto edge_partition_frontier_key_last = frontier_key_last; + if constexpr (GraphViewType::is_multi_gpu) { + auto const& keys = edge_partition_key_buffers[j]; + if constexpr (try_bitmap) { + assert(keys.index() == 1); + edge_partition_frontier_key_first = std::get<1>(keys).begin(); + edge_partition_frontier_key_last = std::get<1>(keys).end(); + } else { + edge_partition_frontier_key_first = get_dataframe_buffer_begin(keys); + edge_partition_frontier_key_last = get_dataframe_buffer_end(keys); + } + } + + extract_transform_v_frontier_e_edge_partition( + handle, + edge_partition, + edge_partition_frontier_key_first, + edge_partition_frontier_key_last, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + get_optional_dataframe_buffer_begin(output_key_buffers[j]), + get_optional_dataframe_buffer_begin(output_value_buffers[j]), + raft::device_span(counters.data() + j, size_t{1}), + e_op, + high_segment_key_local_degree_offset_vectors + ? std::make_optional>( + (*high_segment_key_local_degree_offset_vectors)[j].data(), + (*high_segment_key_local_degree_offset_vectors)[j].size()) + : std::nullopt, + high_segment_edge_counts ? std::make_optional((*high_segment_edge_counts)[j]) + : std::nullopt, + key_segment_offset_vectors ? std::make_optional>( + (*key_segment_offset_vectors)[partition_idx].data(), + (*key_segment_offset_vectors)[partition_idx].size()) + : std::nullopt, + edge_partition_stream_pool_indices); } } - } - // 2. resize and return the buffers + if (stream_pool_indices) { handle.sync_stream_pool(*stream_pool_indices); } - auto new_buffer_size = buffer_idx.value(handle.get_stream()); + std::vector h_counts(loop_count); + raft::update_host(h_counts.data(), counters.data(), loop_count, handle.get_stream()); + handle.sync_stream(); - resize_optional_dataframe_buffer(key_buffer, new_buffer_size, handle.get_stream()); - shrink_to_fit_optional_dataframe_buffer(key_buffer, handle.get_stream()); + for (size_t j = 0; j < loop_count; ++j) { + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); - resize_optional_dataframe_buffer( - value_buffer, new_buffer_size, handle.get_stream()); - shrink_to_fit_optional_dataframe_buffer(value_buffer, handle.get_stream()); + auto tmp_buffer_size = h_counts[j]; + if (tmp_buffer_size > 0) { + auto& tmp_key_buffer = output_key_buffers[j]; + auto& tmp_value_buffer = output_value_buffers[j]; + + resize_optional_dataframe_buffer( + tmp_key_buffer, tmp_buffer_size, loop_stream); + // skip shrink_to_fit before return to cut execution time + + resize_optional_dataframe_buffer( + tmp_value_buffer, tmp_buffer_size, loop_stream); + // skip shrink_to_fit before return to cut execution time + + key_buffers.push_back(std::move(tmp_key_buffer)); + value_buffers.push_back(std::move(tmp_value_buffer)); + } + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + } + + // 3. concatenate and return the buffers + + auto key_buffer = allocate_optional_dataframe_buffer(0, handle.get_stream()); + auto value_buffer = allocate_optional_dataframe_buffer(0, handle.get_stream()); + if (key_buffers.size() == 0) { + /* nothing to do */ + } else if (key_buffers.size() == 1) { + key_buffer = std::move(key_buffers[0]); + value_buffer = std::move(value_buffers[0]); + shrink_to_fit_optional_dataframe_buffer(key_buffer, handle.get_stream()); + shrink_to_fit_optional_dataframe_buffer(value_buffer, handle.get_stream()); + } else { + std::vector buffer_sizes(key_buffers.size()); + static_assert(!std::is_same_v || !std::is_same_v); + for (size_t i = 0; i < key_buffers.size(); ++i) { + if constexpr (!std::is_same_v) { + buffer_sizes[i] = size_optional_dataframe_buffer(key_buffers[i]); + } else { + buffer_sizes[i] = size_optional_dataframe_buffer(value_buffers[i]); + } + } + auto buffer_size = std::reduce(buffer_sizes.begin(), buffer_sizes.end()); + resize_optional_dataframe_buffer(key_buffer, buffer_size, handle.get_stream()); + resize_optional_dataframe_buffer( + value_buffer, buffer_size, handle.get_stream()); + std::vector buffer_displacements(buffer_sizes.size()); + std::exclusive_scan( + buffer_sizes.begin(), buffer_sizes.end(), buffer_displacements.begin(), size_t{0}); + handle.sync_stream(); + for (size_t i = 0; i < key_buffers.size(); ++i) { + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[i]) + : handle.get_stream(); + if constexpr (!std::is_same_v) { + thrust::copy( + rmm::exec_policy_nosync(loop_stream), + get_optional_dataframe_buffer_cbegin(key_buffers[i]), + get_optional_dataframe_buffer_cend(key_buffers[i]), + get_optional_dataframe_buffer_begin(key_buffer) + buffer_displacements[i]); + } + + if constexpr (!std::is_same_v) { + thrust::copy(rmm::exec_policy_nosync(loop_stream), + get_optional_dataframe_buffer_cbegin(value_buffers[i]), + get_optional_dataframe_buffer_cend(value_buffers[i]), + get_optional_dataframe_buffer_begin(value_buffer) + + buffer_displacements[i]); + } + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + } return std::make_tuple(std::move(key_buffer), std::move(value_buffer)); } diff --git a/cpp/src/prims/detail/multi_stream_utils.cuh b/cpp/src/prims/detail/multi_stream_utils.cuh new file mode 100644 index 00000000000..76ef3fb0de4 --- /dev/null +++ b/cpp/src/prims/detail/multi_stream_utils.cuh @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include +#include + +#include + +#include +#include + +namespace cugraph { + +namespace detail { + +inline std::vector init_stream_pool_indices(size_t max_tmp_buffer_size, + size_t approx_tmp_buffer_size_per_loop, + size_t loop_count, + size_t num_streams_per_loop, + size_t max_streams) +{ + size_t num_streams = std::min(loop_count * num_streams_per_loop, + raft::round_down_safe(max_streams, num_streams_per_loop)); + + auto num_concurrent_loops = + (approx_tmp_buffer_size_per_loop > 0) + ? std::max(max_tmp_buffer_size / approx_tmp_buffer_size_per_loop, size_t{1}) + : loop_count; + num_streams = std::min(num_concurrent_loops * num_streams_per_loop, num_streams); + + std::vector stream_pool_indices(num_streams); + std::iota(stream_pool_indices.begin(), stream_pool_indices.end(), size_t{0}); + + return stream_pool_indices; +} + +// this assumes that the caller already knows how many items will be copied. +template +void copy_if_nosync(InputIterator input_first, + InputIterator input_last, + FlagIterator flag_first, + OutputIterator output_first, + raft::device_span count /* size = 1 */, + rmm::cuda_stream_view stream_view) +{ + CUGRAPH_EXPECTS( + static_cast(thrust::distance(input_first, input_last)) <= + static_cast(std::numeric_limits::max()), + "cugraph::detail::copy_if_nosync relies on cub::DeviceSelect::Flagged which uses int for input " + "size, but thrust::distance(input_first, input_last) exceeds std::numeric_limits::max()."); + + size_t tmp_storage_bytes{0}; + size_t input_size = static_cast(thrust::distance(input_first, input_last)); + + cub::DeviceSelect::Flagged(static_cast(nullptr), + tmp_storage_bytes, + input_first, + flag_first, + output_first, + count.data(), + input_size, + stream_view); + + auto d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, stream_view); + + cub::DeviceSelect::Flagged(d_tmp_storage.data(), + tmp_storage_bytes, + input_first, + flag_first, + output_first, + count.data(), + input_size, + stream_view); +} + +template +void count_nosync(InputIterator input_first, + InputIterator input_last, + raft::device_span count /* size = 1 */, + typename thrust::iterator_traits::value_type value, + rmm::cuda_stream_view stream_view) +{ + CUGRAPH_EXPECTS( + static_cast(thrust::distance(input_first, input_last)) <= + static_cast(std::numeric_limits::max()), + "cugraph::detail::count_nosync relies on cub::DeviceReduce::Sum which uses int for input size, " + "but thrust::distance(input_first, input_last) exceeds std::numeric_limits::max()."); + + size_t tmp_storage_bytes{0}; + size_t input_size = static_cast(thrust::distance(input_first, input_last)); + + cub::DeviceReduce::Sum(static_cast(nullptr), + tmp_storage_bytes, + input_first, + count.data(), + input_size, + stream_view); + + auto d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, stream_view); + + cub::DeviceReduce::Sum( + d_tmp_storage.data(), tmp_storage_bytes, input_first, count.data(), input_size, stream_view); +} + +template +void sum_nosync( + InputIterator input_first, + InputIterator input_last, + raft::device_span::value_type> sum /* size = 1 */, + rmm::cuda_stream_view stream_view) +{ + CUGRAPH_EXPECTS( + static_cast(thrust::distance(input_first, input_last)) <= + static_cast(std::numeric_limits::max()), + "cugraph::detail::count_nosync relies on cub::DeviceReduce::Sum which uses int for input size, " + "but thrust::distance(input_first, input_last) exceeds std::numeric_limits::max()."); + + size_t tmp_storage_bytes{0}; + size_t input_size = static_cast(thrust::distance(input_first, input_last)); + + cub::DeviceReduce::Sum(static_cast(nullptr), + tmp_storage_bytes, + input_first, + sum.data(), + input_size, + stream_view); + + auto d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, stream_view); + + cub::DeviceReduce::Sum( + d_tmp_storage.data(), tmp_storage_bytes, input_first, sum.data(), input_size, stream_view); +} + +} // namespace detail + +} // namespace cugraph diff --git a/cpp/src/prims/detail/optional_dataframe_buffer.hpp b/cpp/src/prims/detail/optional_dataframe_buffer.hpp index 87c095f8e81..6657b91f13b 100644 --- a/cpp/src/prims/detail/optional_dataframe_buffer.hpp +++ b/cpp/src/prims/detail/optional_dataframe_buffer.hpp @@ -26,152 +26,130 @@ namespace detail { // we cannot use thrust::iterator_traits::value_type if Iterator is void* (reference to // void is not allowed) template -struct optional_dataframe_buffer_value_type_t; +struct optional_dataframe_buffer_iterator_value_type_t; template -struct optional_dataframe_buffer_value_type_t>> { +struct optional_dataframe_buffer_iterator_value_type_t< + Iterator, + std::enable_if_t>> { using value = typename thrust::iterator_traits::value_type; }; template -struct optional_dataframe_buffer_value_type_t>> { +struct optional_dataframe_buffer_iterator_value_type_t< + Iterator, + std::enable_if_t>> { using value = void; }; -template >* = nullptr> -std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) -{ - return std::byte{0}; // dummy -} - -template >* = nullptr> +template auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream) { - return allocate_dataframe_buffer(size, stream); + if constexpr (std::is_same_v) { + return std::byte{0}; // dummy + } else { + return allocate_dataframe_buffer(size, stream); + } } -template >* = nullptr> -void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer) -{ - return static_cast(nullptr); -} +template +struct optional_dataframe_buffer_type { + using type = decltype(allocate_optional_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})); +}; -template >* = nullptr> -auto get_optional_dataframe_buffer_begin( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))>& - optional_dataframe_buffer) -{ - return get_dataframe_buffer_begin(optional_dataframe_buffer); -} +template +using optional_dataframe_buffer_type_t = typename optional_dataframe_buffer_type::type; -template >* = nullptr> -void* get_optional_dataframe_buffer_end(std::byte& optional_dataframe_buffer) +template +auto get_optional_dataframe_buffer_begin( + optional_dataframe_buffer_type_t& optional_dataframe_buffer) { - return static_cast(nullptr); + if constexpr (std::is_same_v) { + return static_cast(nullptr); + } else { + return get_dataframe_buffer_begin(optional_dataframe_buffer); + } } -template >* = nullptr> +template auto get_optional_dataframe_buffer_end( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))>& - optional_dataframe_buffer) + optional_dataframe_buffer_type_t& optional_dataframe_buffer) { - return get_dataframe_buffer_end(optional_dataframe_buffer); + if constexpr (std::is_same_v) { + return static_cast(nullptr); + } else { + return get_dataframe_buffer_end(optional_dataframe_buffer); + } } -template >* = nullptr> -void const* get_optional_dataframe_buffer_cbegin(std::byte const& optional_dataframe_buffer) -{ - return static_cast(nullptr); -} - -template >* = nullptr> +template auto get_optional_dataframe_buffer_cbegin( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))> const& - optional_dataframe_buffer) + optional_dataframe_buffer_type_t const& optional_dataframe_buffer) { - return get_dataframe_buffer_cbegin(optional_dataframe_buffer); + if constexpr (std::is_same_v) { + return static_cast(nullptr); + } else { + return get_dataframe_buffer_cbegin(optional_dataframe_buffer); + } } -template >* = nullptr> -void const* get_optional_dataframe_buffer_cend(std::byte const& optional_dataframe_buffer) -{ - return static_cast(nullptr); -} - -template >* = nullptr> +template auto get_optional_dataframe_buffer_cend( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))> const& - optional_dataframe_buffer) -{ - return get_dataframe_buffer_cend(optional_dataframe_buffer); -} - -template >* = nullptr> -void reserve_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, - size_t new_buffer_capacity, - rmm::cuda_stream_view stream_view) + optional_dataframe_buffer_type_t const& optional_dataframe_buffer) { - return; + if constexpr (std::is_same_v) { + return static_cast(nullptr); + } else { + return get_dataframe_buffer_cend(optional_dataframe_buffer); + } } -template >* = nullptr> +template void reserve_optional_dataframe_buffer( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))>& - optional_dataframe_buffer, + optional_dataframe_buffer_type_t& optional_dataframe_buffer, size_t new_buffer_capacity, rmm::cuda_stream_view stream_view) { - return reserve_dataframe_buffer(optional_dataframe_buffer, new_buffer_capacity, stream_view); -} - -template >* = nullptr> -void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, - size_t new_buffer_size, - rmm::cuda_stream_view stream_view) -{ - return; + if constexpr (std::is_same_v) { + return; + } else { + return reserve_dataframe_buffer(optional_dataframe_buffer, new_buffer_capacity, stream_view); + } } -template >* = nullptr> +template void resize_optional_dataframe_buffer( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))>& - optional_dataframe_buffer, + optional_dataframe_buffer_type_t& optional_dataframe_buffer, size_t new_buffer_size, rmm::cuda_stream_view stream_view) { - return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view); + if constexpr (std::is_same_v) { + return; + } else { + return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view); + } } -template >* = nullptr> -void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer, - rmm::cuda_stream_view stream_view) -{ - return; -} - -template >* = nullptr> +template void shrink_to_fit_optional_dataframe_buffer( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))>& - optional_dataframe_buffer, - rmm::cuda_stream_view stream_view) -{ - return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); -} - -template >* = nullptr> -size_t size_optional_dataframe_buffer(std::byte const& optional_dataframe_buffer) + optional_dataframe_buffer_type_t& optional_dataframe_buffer, rmm::cuda_stream_view stream_view) { - return size_t{0}; + if constexpr (std::is_same_v) { + return; + } else { + return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); + } } -template >* = nullptr> +template size_t size_optional_dataframe_buffer( - std::decay_t(size_t{0}, rmm::cuda_stream_view{}))> const& - optional_dataframe_buffer) + optional_dataframe_buffer_type_t& optional_dataframe_buffer) { - return size_dataframe_buffer(optional_dataframe_buffer); + if constexpr (std::is_same_v) { + return size_t{0}; + } else { + return size_dataframe_buffer(optional_dataframe_buffer); + } } } // namespace detail diff --git a/cpp/src/prims/detail/per_v_transform_reduce_e.cuh b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh new file mode 100644 index 00000000000..311b16e71ec --- /dev/null +++ b/cpp/src/prims/detail/per_v_transform_reduce_e.cuh @@ -0,0 +1,4374 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "detail/graph_partition_utils.cuh" +#include "prims/detail/multi_stream_utils.cuh" +#include "prims/detail/optional_dataframe_buffer.hpp" +#include "prims/detail/prim_functors.cuh" +#include "prims/detail/prim_utils.cuh" +#include "prims/fill_edge_src_dst_property.cuh" +#include "prims/property_op_utils.cuh" +#include "prims/reduce_op.cuh" +#include "prims/vertex_frontier.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace cugraph { + +namespace detail { + +int32_t constexpr per_v_transform_reduce_e_kernel_block_size = 512; +int32_t constexpr per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size = 128; + +template +struct iterator_value_type_or_default_t; + +template +struct iterator_value_type_or_default_t>> { + using value_type = default_t; // if Iterator is invalid (void*), value_type = default_t +}; + +template +struct iterator_value_type_or_default_t>> { + using value_type = typename thrust::iterator_traits< + Iterator>::value_type; // if iterator is valid, value_type = typename + // thrust::iterator_traits::value_type +}; + +template +__device__ auto init_pred_op( + edge_partition_device_view_t const& edge_partition, + EdgePartitionSrcValueInputWrapper const& edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper const& edge_partition_dst_value_input, + EdgePartitionEdgeValueInputWrapper const& edge_partition_e_value_input, + PredOp const& pred_op, + key_t key, + typename GraphViewType::vertex_type major_offset, + typename GraphViewType::vertex_type const* indices, + typename GraphViewType::edge_type edge_offset) +{ + if constexpr (std::is_same_v< + PredOp, + const_true_e_op_t>) { + return call_const_true_e_op_t{}; + } else { + return call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + pred_op, + key, + major_offset, + indices, + edge_offset}; + } +} + +template +struct transform_and_atomic_reduce_t { + edge_partition_device_view_t const& edge_partition{}; + vertex_t const* indices{nullptr}; + TransformOp const& transform_op{}; + PredOp const& pred_op{}; + ResultValueOutputIteratorOrWrapper& result_value_output{}; + + __device__ void operator()(edge_t i) const + { + if (pred_op(i)) { + auto e_op_result = transform_op(i); + auto minor = indices[i]; + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + if constexpr (multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } +}; + +template +__device__ void update_result_value_output( + edge_partition_device_view_t const& edge_partition, + vertex_t const* indices, + edge_t local_degree, + TransformOp const& transform_op, + result_t init, + ReduceOp const& reduce_op, + PredOp const& pred_op, + size_t output_idx /* relevent only when update_major === true */, + ResultValueOutputIteratorOrWrapper& result_value_output) +{ + if constexpr (update_major) { + result_t val{}; + if constexpr (std::is_same_v>) { + if constexpr (std::is_same_v>) { // init is selected only when no + // edges return a valid value + val = init; + for (edge_t i = 0; i < local_degree; ++i) { + auto tmp = transform_op(i); + val = tmp; + break; + } + } else { + val = thrust::transform_reduce(thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_op, + init, + reduce_op); + } + } else { + val = init; + for (edge_t i = 0; i < local_degree; ++i) { + if (pred_op(i)) { + auto tmp = transform_op(i); + if constexpr (std::is_same_v>) { // init is selected only when + // no edges return a valid + // value + val = tmp; + break; + } else { + val = reduce_op(val, tmp); + } + } + } + } + *(result_value_output + output_idx) = val; + } else { + thrust::for_each(thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_and_atomic_reduce_t{ + edge_partition, indices, transform_op, pred_op, result_value_output}); + } +} + +template +__global__ static void per_v_transform_reduce_e_hypersparse( + edge_partition_device_view_t edge_partition, + OptionalKeyIterator key_first, + OptionalKeyIterator key_last, + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, + EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, + ResultValueOutputIteratorOrWrapper result_value_output, + EdgeOp e_op, + T init /* relevant only if update_major == true */, + ReduceOp reduce_op, + PredOp pred_op) +{ + constexpr bool use_input_key = !std::is_same_v; + static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< + ReduceOp>); // atomic_reduce is defined only when + // has_compatible_raft_comms_op_t is true + static_assert(update_major || !use_input_key); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = + typename iterator_value_type_or_default_t::value_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto idx = static_cast(tid); + + size_t key_count{}; + if constexpr (use_input_key) { + key_count = static_cast(thrust::distance(key_first, key_last)); + } else { + key_count = *(edge_partition.dcs_nzd_vertex_count()); + } + + while (idx < key_count) { + key_t key{}; + vertex_t major{}; + thrust::optional major_idx{}; + if constexpr (use_input_key) { + key = *(key_first + idx); + major = thrust_tuple_get_or_identity(key); + major_idx = edge_partition.major_idx_from_major_nocheck(major); + } else { + key = *(edge_partition.major_from_major_hypersparse_idx_nocheck(static_cast(idx))); + major = key; + auto major_start_offset = static_cast(*(edge_partition.major_hypersparse_first()) - + edge_partition.major_range_first()); + major_idx = major_start_offset + idx; // major_offset != major_idx in the hypersparse region + } + + size_t output_idx = use_input_key ? idx : (major - *(edge_partition).major_hypersparse_first()); + if (major_idx) { + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + vertex_t const* indices{nullptr}; + edge_t edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, edge_offset, local_degree) = + edge_partition.local_edges(static_cast(*major_idx)); + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + key, + major_offset, + indices, + edge_offset}; + + auto call_pred_op = init_pred_op(edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + pred_op, + key, + major_offset, + indices, + edge_offset); + + if (edge_partition_e_mask) { + update_result_value_output( + edge_partition, + indices, + local_degree, + call_e_op, + init, + reduce_op, + [&edge_partition_e_mask, &call_pred_op, edge_offset] __device__(edge_t i) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + return call_pred_op(i); + } else { + return false; + } + }, + output_idx, + result_value_output); + } else { + update_result_value_output(edge_partition, + indices, + local_degree, + call_e_op, + init, + reduce_op, + call_pred_op, + output_idx, + result_value_output); + } + } else { + if constexpr (update_major) { *(result_value_output + output_idx) = init; } + } + idx += gridDim.x * blockDim.x; + } +} + +template +__global__ static void per_v_transform_reduce_e_low_degree( + edge_partition_device_view_t edge_partition, + KeyIterator key_first, + KeyIterator key_last, + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, + EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, + ResultValueOutputIteratorOrWrapper result_value_output, + EdgeOp e_op, + T init /* relevant only if update_major == true */, + ReduceOp reduce_op, + PredOp pred_op) +{ + static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< + ReduceOp>); // atomic_reduce is defined only when + // has_compatible_raft_comms_op_t is true + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = typename thrust::iterator_traits::value_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto idx = static_cast(tid); + + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + auto major = thrust_tuple_get_or_identity(key); + + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + vertex_t const* indices{nullptr}; + edge_t edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, edge_offset, local_degree) = + edge_partition.local_edges(static_cast(major_offset)); + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + key, + major_offset, + indices, + edge_offset}; + + auto call_pred_op = init_pred_op(edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + pred_op, + key, + major_offset, + indices, + edge_offset); + + if (edge_partition_e_mask) { + update_result_value_output( + edge_partition, + indices, + local_degree, + call_e_op, + init, + reduce_op, + [&edge_partition_e_mask, &call_pred_op, edge_offset] __device__(edge_t i) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + return call_pred_op(i); + } else { + return false; + } + }, + idx, + result_value_output); + } else { + update_result_value_output(edge_partition, + indices, + local_degree, + call_e_op, + init, + reduce_op, + call_pred_op, + idx, + result_value_output); + } + idx += gridDim.x * blockDim.x; + } +} + +template +__global__ static void per_v_transform_reduce_e_mid_degree( + edge_partition_device_view_t edge_partition, + KeyIterator key_first, + KeyIterator key_last, + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, + EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, + ResultValueOutputIteratorOrWrapper result_value_output, + EdgeOp e_op, + T init /* relevant only if update_major == true */, + T identity_element /* relevant only if update_major == true */, + ReduceOp reduce_op, + PredOp pred_op) +{ + static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< + ReduceOp>); // atomic_reduce is defined only when + // has_compatible_raft_comms_op_t is true + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using e_op_result_t = T; + using key_t = typename thrust::iterator_traits::value_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(per_v_transform_reduce_e_kernel_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto idx = static_cast(tid / raft::warp_size()); + + using WarpReduce = cub::WarpReduce< + std::conditional_t>, int32_t, e_op_result_t>>; + [[maybe_unused]] __shared__ + std::conditional_t + temp_storage[update_major ? (per_v_transform_reduce_e_kernel_block_size / raft::warp_size()) + : int32_t{1} /* dummy */]; + + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + auto major = thrust_tuple_get_or_identity(key); + + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + vertex_t const* indices{nullptr}; + edge_t edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + key, + major_offset, + indices, + edge_offset}; + + auto call_pred_op = init_pred_op(edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + pred_op, + key, + major_offset, + indices, + edge_offset); + + [[maybe_unused]] std::conditional_t + reduced_e_op_result{}; + [[maybe_unused]] std::conditional_t>, + int32_t, + std::byte /* dummy */> + first_valid_lane_id{}; + if constexpr (update_major) { + reduced_e_op_result = + (lane_id == 0) ? init : identity_element; // init == identity_element for reduce_op::any + if constexpr (std::is_same_v>) { + first_valid_lane_id = raft::warp_size(); + } + } + + if (edge_partition_e_mask) { + if constexpr (update_major && std::is_same_v>) { + auto rounded_up_local_degree = + ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * + raft::warp_size(); + for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { + thrust::optional e_op_result{thrust::nullopt}; + if ((i < static_cast(local_degree)) && + (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) { + e_op_result = call_e_op(i); + } + first_valid_lane_id = WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) + .Reduce(e_op_result ? lane_id : raft::warp_size(), cub::Min()); + first_valid_lane_id = __shfl_sync(raft::warp_full_mask(), first_valid_lane_id, int{0}); + if (lane_id == first_valid_lane_id) { reduced_e_op_result = *e_op_result; } + if (first_valid_lane_id != raft::warp_size()) { break; } + } + } else { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + if ((*edge_partition_e_mask).get(edge_offset + i) & call_pred_op(i)) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); + } else { + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } + } + } + } else { + if constexpr (update_major && std::is_same_v>) { + auto rounded_up_local_degree = + ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * + raft::warp_size(); + for (size_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { + thrust::optional e_op_result{thrust::nullopt}; + if (i < static_cast(local_degree) && call_pred_op(i)) { + e_op_result = call_e_op(i); + } + first_valid_lane_id = WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) + .Reduce(e_op_result ? lane_id : raft::warp_size(), cub::Min()); + first_valid_lane_id = __shfl_sync(raft::warp_full_mask(), first_valid_lane_id, int{0}); + if (lane_id == first_valid_lane_id) { reduced_e_op_result = *e_op_result; } + if (first_valid_lane_id != raft::warp_size()) { break; } + } + } else { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + if (call_pred_op(i)) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); + } else { + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } + } + } + } + + if constexpr (update_major) { + if constexpr (std::is_same_v>) { + if (lane_id == ((first_valid_lane_id == raft::warp_size()) ? 0 : first_valid_lane_id)) { + *(result_value_output + idx) = reduced_e_op_result; + } + } else { + reduced_e_op_result = WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) + .Reduce(reduced_e_op_result, reduce_op); + if (lane_id == 0) { *(result_value_output + idx) = reduced_e_op_result; } + } + } + + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } +} + +template +__global__ static void per_v_transform_reduce_e_high_degree( + edge_partition_device_view_t edge_partition, + KeyIterator key_first, + KeyIterator key_last, + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, + EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, + ResultValueOutputIteratorOrWrapper result_value_output, + EdgeOp e_op, + T init /* relevant only if update_major == true */, + T identity_element /* relevant only if update_major == true */, + ReduceOp reduce_op, + PredOp pred_op) +{ + static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< + ReduceOp>); // atomic_reduce is defined only when + // has_compatible_raft_comms_op_t is true + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using e_op_result_t = T; + using key_t = typename thrust::iterator_traits::value_type; + + auto idx = static_cast(blockIdx.x); + + using BlockReduce = cub::BlockReduce< + std::conditional_t>, int32_t, e_op_result_t>, + std::is_same_v> + ? per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size + : per_v_transform_reduce_e_kernel_block_size>; + [[maybe_unused]] __shared__ + std::conditional_t + temp_storage; + [[maybe_unused]] __shared__ + std::conditional_t>, + int32_t, + std::byte /* dummy */> + output_thread_id; + + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + auto major = thrust_tuple_get_or_identity(key); + + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + vertex_t const* indices{nullptr}; + edge_t edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + key, + major_offset, + indices, + edge_offset}; + + auto call_pred_op = init_pred_op(edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + pred_op, + key, + major_offset, + indices, + edge_offset); + + [[maybe_unused]] std::conditional_t + reduced_e_op_result{}; + [[maybe_unused]] std::conditional_t>, + int32_t, + std::byte /* dummy */> + first_valid_thread_id{}; + if constexpr (update_major) { + reduced_e_op_result = threadIdx.x == 0 + ? init + : identity_element; // init == identity_element for reduce_op::any + if constexpr (std::is_same_v>) { + first_valid_thread_id = per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size; + } + } + + if (edge_partition_e_mask) { + if constexpr (update_major && std::is_same_v>) { + auto rounded_up_local_degree = + ((static_cast(local_degree) + + (per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size - 1)) / + per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) * + per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size; + for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) { + thrust::optional e_op_result{thrust::nullopt}; + if ((i < static_cast(local_degree)) && + (*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) { + e_op_result = call_e_op(i); + } + first_valid_thread_id = + BlockReduce(temp_storage) + .Reduce(e_op_result + ? threadIdx.x + : per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size, + cub::Min()); + if (threadIdx.x == 0) { output_thread_id = first_valid_thread_id; } + __syncthreads(); + first_valid_thread_id = output_thread_id; + if (threadIdx.x == first_valid_thread_id) { reduced_e_op_result = *e_op_result; } + if (first_valid_thread_id != + per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) { + break; + } + } + } else { + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + if ((*edge_partition_e_mask).get(edge_offset + i) && call_pred_op(i)) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); + } else { + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } + } + } + } else { + if constexpr (update_major && std::is_same_v>) { + auto rounded_up_local_degree = + ((static_cast(local_degree) + + (per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size - 1)) / + per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) * + per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size; + for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) { + thrust::optional e_op_result{thrust::nullopt}; + if ((i < static_cast(local_degree)) && call_pred_op(i)) { + e_op_result = call_e_op(i); + } + first_valid_thread_id = + BlockReduce(temp_storage) + .Reduce(e_op_result + ? threadIdx.x + : per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size, + cub::Min()); + if (threadIdx.x == 0) { output_thread_id = first_valid_thread_id; } + __syncthreads(); + first_valid_thread_id = output_thread_id; + if (threadIdx.x == first_valid_thread_id) { reduced_e_op_result = *e_op_result; } + if (first_valid_thread_id != + per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) { + break; + } + } + } else { + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + if (call_pred_op(i)) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); + } else { + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } + } + } + } + + if constexpr (update_major) { + if constexpr (std::is_same_v>) { + if (threadIdx.x == ((first_valid_thread_id == + per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size) + ? 0 + : first_valid_thread_id)) { + *(result_value_output + idx) = reduced_e_op_result; + } + } else { + reduced_e_op_result = BlockReduce(temp_storage).Reduce(reduced_e_op_result, reduce_op); + if (threadIdx.x == 0) { *(result_value_output + idx) = reduced_e_op_result; } + } + } + + idx += gridDim.x; + } +} + +template +void compute_priorities( + raft::comms::comms_t const& comm, + ValueIterator value_first, + raft::device_span priorities, + std::optional, raft::device_span>> + hypersparse_key_offsets, // we may not have values for the entire "range_size" if + // hypersparse_key_offsets.has_value() is true + size_t contiguous_size, + int root, + int subgroup_size /* faster interconnect within a subgroup */, + typename thrust::iterator_traits::value_type init, + bool ignore_local_values, + rmm::cuda_stream_view stream_view) +{ + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + // For each vertex, select a comm_rank among the GPUs with a value other than init (if there are + // more than one, the GPU with (comm_rank == root) has the highest priority, GPUs in the same DGX + // node should be the next) + + if (ignore_local_values) { + thrust::fill(rmm::exec_policy_nosync(stream_view), + priorities.begin(), + priorities.end(), + std::numeric_limits::max()); + } else { + thrust::tabulate( + rmm::exec_policy_nosync(stream_view), + priorities.begin(), + priorities.begin() + contiguous_size, + [value_first, root, subgroup_size, init, comm_rank, comm_size] __device__(auto offset) { + auto val = *(value_first + offset); + return (val != init) + ? rank_to_priority( + comm_rank, root, subgroup_size, comm_size, static_cast(offset)) + : std::numeric_limits::max(); // lowest priority + }); + if (hypersparse_key_offsets) { + thrust::fill(rmm::exec_policy_nosync(stream_view), + priorities.begin() + contiguous_size, + priorities.end(), + std::numeric_limits::max()); + if ((*hypersparse_key_offsets).index() == 0) { + auto priority_first = thrust::make_transform_iterator( + std::get<0>(*hypersparse_key_offsets).begin(), + cuda::proclaim_return_type( + [root, subgroup_size, comm_rank, comm_size] __device__(uint32_t offset) { + return rank_to_priority( + comm_rank, root, subgroup_size, comm_size, static_cast(offset)); + })); + thrust::scatter_if( + rmm::exec_policy_nosync(stream_view), + priority_first, + priority_first + std::get<0>(*hypersparse_key_offsets).size(), + std::get<0>(*hypersparse_key_offsets).begin(), + value_first + contiguous_size, + priorities.begin(), + is_not_equal_t::value_type>{init}); + } else { + auto priority_first = thrust::make_transform_iterator( + std::get<1>(*hypersparse_key_offsets).begin(), + cuda::proclaim_return_type( + [root, subgroup_size, comm_rank, comm_size] __device__(size_t offset) { + return rank_to_priority( + comm_rank, root, subgroup_size, comm_size, static_cast(offset)); + })); + thrust::scatter_if( + rmm::exec_policy_nosync(stream_view), + priority_first, + priority_first + std::get<1>(*hypersparse_key_offsets).size(), + std::get<1>(*hypersparse_key_offsets).begin(), + value_first + contiguous_size, + priorities.begin(), + is_not_equal_t::value_type>{init}); + } + } + } +} + +// return selected ranks if root. +// otherwise, it is sufficient to just return bool flags indiciating whether this rank's values are +// selected or not. +template +std::variant, + int, + priority_t>> /* root, store selected ranks */, + std::optional> /* store bitmap */> +compute_selected_ranks_from_priorities( + raft::comms::comms_t const& comm, + raft::device_span priorities, + std::optional, raft::device_span>> + hypersparse_key_offsets, // we may not have values for the entire "range_size" if + // hypersparse_key_offsets.has_value() is true + size_t contiguous_size, + int root, + int subgroup_size /* faster interconnect within a subgroup */, + bool ignore_local_values, + rmm::cuda_stream_view stream_view) +{ + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + using rank_t = std::conditional_t, int, priority_t>; + + if (comm_rank == root) { + rmm::device_uvector selected_ranks(priorities.size(), stream_view); + auto offset_priority_pair_first = + thrust::make_zip_iterator(thrust::make_counting_iterator(vertex_t{0}), priorities.begin()); + thrust::transform(rmm::exec_policy_nosync(stream_view), + offset_priority_pair_first, + offset_priority_pair_first + priorities.size(), + selected_ranks.begin(), + [root, subgroup_size, comm_rank, comm_size] __device__(auto pair) { + auto offset = thrust::get<0>(pair); + auto priority = thrust::get<1>(pair); + auto rank = (priority == std::numeric_limits::max()) + ? comm_size + : priority_to_rank( + priority, root, subgroup_size, comm_size, offset); + return static_cast(rank); + }); + return selected_ranks; + } else { + std::optional> keep_flags{std::nullopt}; + if (!ignore_local_values) { + keep_flags = rmm::device_uvector( + packed_bool_size(hypersparse_key_offsets + ? (contiguous_size + ((*hypersparse_key_offsets).index() == 0 + ? std::get<0>(*hypersparse_key_offsets).size() + : std::get<1>(*hypersparse_key_offsets).size())) + : contiguous_size), + stream_view); + thrust::fill(rmm::exec_policy_nosync(stream_view), + (*keep_flags).begin(), + (*keep_flags).end(), + packed_bool_empty_mask()); + auto offset_priority_pair_first = + thrust::make_zip_iterator(thrust::make_counting_iterator(vertex_t{0}), priorities.begin()); + thrust::for_each( + rmm::exec_policy_nosync(stream_view), + offset_priority_pair_first, + offset_priority_pair_first + contiguous_size, + [keep_flags = raft::device_span((*keep_flags).data(), (*keep_flags).size()), + root, + subgroup_size, + comm_rank, + comm_size] __device__(auto pair) { + auto offset = thrust::get<0>(pair); + auto priority = thrust::get<1>(pair); + auto rank = (priority == std::numeric_limits::max()) + ? comm_size + : priority_to_rank( + priority, root, subgroup_size, comm_size, offset); + if (rank == comm_rank) { + cuda::atomic_ref word( + keep_flags[packed_bool_offset(offset)]); + word.fetch_or(packed_bool_mask(offset), cuda::std::memory_order_relaxed); + } + }); + if (hypersparse_key_offsets) { + if ((*hypersparse_key_offsets).index() == 0) { + auto pair_first = + thrust::make_zip_iterator(thrust::make_counting_iterator(size_t{contiguous_size}), + std::get<0>(*hypersparse_key_offsets).begin()); + thrust::for_each( + rmm::exec_policy_nosync(stream_view), + pair_first, + pair_first + std::get<0>(*hypersparse_key_offsets).size(), + [priorities = raft::device_span(priorities.data(), priorities.size()), + keep_flags = raft::device_span((*keep_flags).data(), (*keep_flags).size()), + root, + subgroup_size, + comm_rank, + comm_size] __device__(auto pair) { + auto offset = thrust::get<1>(pair); + auto priority = priorities[offset]; + auto rank = + (priority == std::numeric_limits::max()) + ? comm_size + : priority_to_rank( + priority, root, subgroup_size, comm_size, static_cast(offset)); + if (rank == comm_rank) { + cuda::atomic_ref word( + keep_flags[packed_bool_offset(thrust::get<0>(pair))]); + word.fetch_or(packed_bool_mask(thrust::get<0>(pair)), + cuda::std::memory_order_relaxed); + } + }); + } else { + auto pair_first = + thrust::make_zip_iterator(thrust::make_counting_iterator(size_t{contiguous_size}), + std::get<1>(*hypersparse_key_offsets).begin()); + thrust::for_each( + rmm::exec_policy_nosync(stream_view), + pair_first, + pair_first + std::get<1>(*hypersparse_key_offsets).size(), + [priorities = raft::device_span(priorities.data(), priorities.size()), + keep_flags = raft::device_span((*keep_flags).data(), (*keep_flags).size()), + root, + subgroup_size, + comm_rank, + comm_size] __device__(auto pair) { + auto offset = thrust::get<1>(pair); + auto priority = priorities[offset]; + auto rank = + (priority == std::numeric_limits::max()) + ? comm_size + : priority_to_rank( + priority, root, subgroup_size, comm_size, static_cast(offset)); + if (rank == comm_rank) { + cuda::atomic_ref word( + keep_flags[packed_bool_offset(thrust::get<0>(pair))]); + word.fetch_or(packed_bool_mask(thrust::get<0>(pair)), + cuda::std::memory_order_relaxed); + } + }); + } + } + } + return keep_flags; + } +} + +template +void per_v_transform_reduce_e_edge_partition( + raft::handle_t const& handle, + edge_partition_device_view_t edge_partition, + OptionalKeyIterator edge_partition_key_first, + OptionalKeyIterator edge_partition_key_last, + EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, + EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, + EdgePartitionValueInputWrapper edge_partition_e_value_input, + thrust::optional edge_partition_e_mask, + ResultValueOutputIteratorOrWrapper output_buffer, + EdgeOp e_op, + T major_init, + T major_identity_element, + ReduceOp reduce_op, + PredOp pred_op, + std::optional> key_segment_offsets, + std::optional> const& edge_partition_stream_pool_indices) +{ + constexpr bool use_input_key = !std::is_same_v; + + using vertex_t = typename GraphViewType::vertex_type; + using segment_key_iterator_t = + std::conditional_t; + + size_t stream_pool_size{0}; + if (edge_partition_stream_pool_indices) { + stream_pool_size = (*edge_partition_stream_pool_indices).size(); + } + if (key_segment_offsets) { + static_assert(detail::num_sparse_segments_per_vertex_partition == 3); + + if (edge_partition.dcs_nzd_vertex_count()) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[0 % stream_pool_size]) + : handle.get_stream(); + + if constexpr (update_major && !use_input_key) { // this is necessary as we don't visit + // every vertex in the hypersparse segment + thrust::fill(rmm::exec_policy_nosync(exec_stream), + output_buffer + (*key_segment_offsets)[3], + output_buffer + (*key_segment_offsets)[4], + major_init); + } + + auto segment_size = use_input_key + ? ((*key_segment_offsets)[4] - (*key_segment_offsets)[3]) + : static_cast(*(edge_partition.dcs_nzd_vertex_count())); + if (segment_size > 0) { + raft::grid_1d_thread_t update_grid(segment_size, + detail::per_v_transform_reduce_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + auto segment_output_buffer = output_buffer; + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[3]; } + auto segment_key_first = edge_partition_key_first; + auto segment_key_last = edge_partition_key_last; + if constexpr (use_input_key) { + segment_key_first += (*key_segment_offsets)[3]; + segment_key_last = + segment_key_first + ((*key_segment_offsets)[4] - (*key_segment_offsets)[3]); + } else { + assert(segment_key_first == nullptr); + assert(segment_key_last == nullptr); + } + detail::per_v_transform_reduce_e_hypersparse + <<>>( + edge_partition, + segment_key_first, + segment_key_last, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + segment_output_buffer, + e_op, + major_init, + reduce_op, + pred_op); + } + } + if ((*key_segment_offsets)[3] - (*key_segment_offsets)[2]) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[1 % stream_pool_size]) + : handle.get_stream(); + raft::grid_1d_thread_t update_grid((*key_segment_offsets)[3] - (*key_segment_offsets)[2], + detail::per_v_transform_reduce_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + auto segment_output_buffer = output_buffer; + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[2]; } + std::optional + segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor + // is a deleted function, segment_key_first should always have a value + if constexpr (use_input_key) { + segment_key_first = edge_partition_key_first; + } else { + segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); + } + *segment_key_first += (*key_segment_offsets)[2]; + detail::per_v_transform_reduce_e_low_degree + <<>>( + edge_partition, + *segment_key_first, + *segment_key_first + ((*key_segment_offsets)[3] - (*key_segment_offsets)[2]), + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + segment_output_buffer, + e_op, + major_init, + reduce_op, + pred_op); + } + if ((*key_segment_offsets)[2] - (*key_segment_offsets)[1] > 0) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[2 % stream_pool_size]) + : handle.get_stream(); + raft::grid_1d_warp_t update_grid((*key_segment_offsets)[2] - (*key_segment_offsets)[1], + detail::per_v_transform_reduce_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + auto segment_output_buffer = output_buffer; + if constexpr (update_major) { segment_output_buffer += (*key_segment_offsets)[1]; } + std::optional + segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor + // is a deleted function, segment_key_first should always have a value + if constexpr (use_input_key) { + segment_key_first = edge_partition_key_first; + } else { + segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); + } + *segment_key_first += (*key_segment_offsets)[1]; + detail::per_v_transform_reduce_e_mid_degree + <<>>( + edge_partition, + *segment_key_first, + *segment_key_first + ((*key_segment_offsets)[2] - (*key_segment_offsets)[1]), + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + segment_output_buffer, + e_op, + major_init, + major_identity_element, + reduce_op, + pred_op); + } + if ((*key_segment_offsets)[1] > 0) { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[3 % stream_pool_size]) + : handle.get_stream(); + raft::grid_1d_block_t update_grid( + (*key_segment_offsets)[1], + std::is_same_v> + ? detail::per_v_transform_reduce_e_kernel_high_degree_reduce_any_block_size + : detail::per_v_transform_reduce_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + std::optional + segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor + // is a deleted function, segment_key_first should always have a value + if constexpr (use_input_key) { + segment_key_first = edge_partition_key_first; + } else { + segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); + } + detail::per_v_transform_reduce_e_high_degree + <<>>( + edge_partition, + *segment_key_first, + *segment_key_first + (*key_segment_offsets)[1], + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_buffer, + e_op, + major_init, + major_identity_element, + reduce_op, + pred_op); + } + } else { + auto exec_stream = edge_partition_stream_pool_indices + ? handle.get_stream_from_stream_pool( + (*edge_partition_stream_pool_indices)[0 % stream_pool_size]) + : handle.get_stream(); + + size_t num_keys{}; + if constexpr (use_input_key) { + num_keys = + static_cast(thrust::distance(edge_partition_key_first, edge_partition_key_last)); + } else { + num_keys = static_cast(edge_partition.major_range_size()); + } + + if (num_keys > size_t{0}) { + raft::grid_1d_thread_t update_grid(num_keys, + detail::per_v_transform_reduce_e_kernel_block_size, + handle.get_device_properties().maxGridSize[0]); + std::optional + segment_key_first{}; // std::optional as thrust::transform_iterator's default constructor + // is a deleted function, segment_key_first should always have a value + if constexpr (use_input_key) { + segment_key_first = edge_partition_key_first; + } else { + segment_key_first = thrust::make_counting_iterator(edge_partition.major_range_first()); + } + detail::per_v_transform_reduce_e_low_degree + <<>>( + edge_partition, + *segment_key_first, + *segment_key_first + num_keys, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_buffer, + e_op, + major_init, + reduce_op, + pred_op); + } + } +} + +template +void per_v_transform_reduce_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + OptionalKeyIterator sorted_unique_key_first, + OptionalKeyIterator sorted_unique_key_last, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + T init, + ReduceOp reduce_op, + PredOp pred_op, + VertexValueOutputIterator vertex_value_output_first) +{ + constexpr bool update_major = (incoming == GraphViewType::is_storage_transposed); + constexpr bool use_input_key = !std::is_same_v; + static_assert(update_major || !use_input_key); + constexpr bool filter_input_key = + GraphViewType::is_multi_gpu && use_input_key && + std::is_same_v>; // if GraphViewType::is_multi_gpu && update_major && + // std::is_same_v>, for any + // vertex in the frontier, we need to visit only local edges + // if we find any valid local edge (FIXME: this is + // applicable even when use_input_key is false). + + static_assert( + ReduceOp::pure_function && + ((reduce_op::has_compatible_raft_comms_op_v && + reduce_op::has_identity_element_v) || + (update_major && + std::is_same_v>))); // current restriction, to support general + // reduction, we may need to take a less + // efficient code path + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = + typename iterator_value_type_or_default_t::value_type; + + using edge_partition_src_input_device_view_t = std::conditional_t< + std::is_same_v, + detail::edge_partition_endpoint_dummy_property_device_view_t, + detail::edge_partition_endpoint_property_device_view_t< + vertex_t, + typename EdgeSrcValueInputWrapper::value_iterator, + typename EdgeSrcValueInputWrapper::value_type>>; + using edge_partition_dst_input_device_view_t = std::conditional_t< + std::is_same_v, + detail::edge_partition_endpoint_dummy_property_device_view_t, + detail::edge_partition_endpoint_property_device_view_t< + vertex_t, + typename EdgeDstValueInputWrapper::value_iterator, + typename EdgeDstValueInputWrapper::value_type>>; + using edge_partition_e_input_device_view_t = std::conditional_t< + std::is_same_v, + detail::edge_partition_edge_dummy_property_device_view_t, + detail::edge_partition_edge_property_device_view_t< + edge_t, + typename EdgeValueInputWrapper::value_iterator, + typename EdgeValueInputWrapper::value_type>>; + + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + + constexpr bool try_bitmap = + GraphViewType::is_multi_gpu && use_input_key && std::is_same_v; + + [[maybe_unused]] constexpr auto max_segments = + detail::num_sparse_segments_per_vertex_partition + size_t{1}; + + // 1. drop zero degree keys & compute key_segment_offsets + + auto local_vertex_partition_segment_offsets = graph_view.local_vertex_partition_segment_offsets(); + + std::conditional_t>, std::byte /* dummy */> + key_segment_offsets{}; + auto sorted_unique_nzd_key_last = sorted_unique_key_last; + if constexpr (use_input_key) { + if (local_vertex_partition_segment_offsets) { + key_segment_offsets = compute_key_segment_offsets( + sorted_unique_key_first, + sorted_unique_nzd_key_last, + raft::host_span((*local_vertex_partition_segment_offsets).data(), + (*local_vertex_partition_segment_offsets).size()), + graph_view.local_vertex_partition_range_first(), + handle.get_stream()); + (*key_segment_offsets).back() = *((*key_segment_offsets).rbegin() + 1); + sorted_unique_nzd_key_last = sorted_unique_key_first + (*key_segment_offsets).back(); + } + } + + // 2. initialize vertex value output buffer + + if constexpr (update_major) { // no vertices in the zero degree segment are visited (otherwise, + // no need to initialize) + if constexpr (use_input_key) { + thrust::fill(handle.get_thrust_policy(), + vertex_value_output_first + + thrust::distance(sorted_unique_key_first, sorted_unique_nzd_key_last), + vertex_value_output_first + + thrust::distance(sorted_unique_key_first, sorted_unique_key_last), + init); + } else { + if (local_vertex_partition_segment_offsets) { + thrust::fill( + handle.get_thrust_policy(), + vertex_value_output_first + *((*local_vertex_partition_segment_offsets).rbegin() + 1), + vertex_value_output_first + *((*local_vertex_partition_segment_offsets).rbegin()), + init); + } + } + } else { + if constexpr (GraphViewType::is_multi_gpu) { + /* no need to initialize (we use minor_tmp_buffer) */ + } else { + thrust::fill(handle.get_thrust_policy(), + vertex_value_output_first, + vertex_value_output_first + graph_view.local_vertex_partition_range_size(), + init); + } + } + + // 3. filter input keys & update key_segment_offsets + + auto edge_mask_view = graph_view.edge_mask_view(); + + auto tmp_key_buffer = + allocate_optional_dataframe_buffer>( + 0, handle.get_stream()); + auto tmp_output_indices = + allocate_optional_dataframe_buffer>( + 0, handle.get_stream()); + std::conditional_t, + VertexValueOutputIterator> + tmp_vertex_value_output_first{}; + if constexpr (filter_input_key) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(static_cast(minor_comm_rank))); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, static_cast(minor_comm_rank)) + : thrust::nullopt; + + std::optional> edge_partition_stream_pool_indices{std::nullopt}; + if (local_vertex_partition_segment_offsets && (handle.get_stream_pool_size() >= max_segments)) { + edge_partition_stream_pool_indices = std::vector(max_segments); + std::iota((*edge_partition_stream_pool_indices).begin(), + (*edge_partition_stream_pool_indices).end(), + size_t{0}); + } + + if (edge_partition_stream_pool_indices) { handle.sync_stream(); } + + edge_partition_src_input_device_view_t edge_partition_src_value_input{}; + edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; + if constexpr (GraphViewType::is_storage_transposed) { + edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input); + edge_partition_dst_value_input = edge_partition_dst_input_device_view_t( + edge_dst_value_input, static_cast(minor_comm_rank)); + } else { + edge_partition_src_value_input = edge_partition_src_input_device_view_t( + edge_src_value_input, static_cast(minor_comm_rank)); + edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); + } + auto edge_partition_e_value_input = + edge_partition_e_input_device_view_t(edge_value_input, static_cast(minor_comm_rank)); + + per_v_transform_reduce_e_edge_partition( + handle, + edge_partition, + sorted_unique_key_first, + sorted_unique_nzd_key_last, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + vertex_value_output_first, + e_op, + init, + init, + reduce_op, + pred_op, + key_segment_offsets ? std::make_optional>( + (*key_segment_offsets).data(), (*key_segment_offsets).size()) + : std::nullopt, + edge_partition_stream_pool_indices ? std::make_optional>( + (*edge_partition_stream_pool_indices).data(), + (*edge_partition_stream_pool_indices).size()) + : std::nullopt); + + if (edge_partition_stream_pool_indices) { + handle.sync_stream_pool(*edge_partition_stream_pool_indices); + } + + auto num_tmp_keys = thrust::count( + handle.get_thrust_policy(), + vertex_value_output_first, + vertex_value_output_first + + thrust::distance(sorted_unique_key_first, sorted_unique_nzd_key_last), + init); // we allow false positives (some edge operations may actually return init) + + resize_optional_dataframe_buffer(tmp_key_buffer, num_tmp_keys, handle.get_stream()); + resize_optional_dataframe_buffer(tmp_output_indices, num_tmp_keys, handle.get_stream()); + + auto input_first = + thrust::make_zip_iterator(sorted_unique_key_first, thrust::make_counting_iterator(size_t{0})); + thrust::copy_if( + handle.get_thrust_policy(), + input_first, + input_first + thrust::distance(sorted_unique_key_first, sorted_unique_nzd_key_last), + vertex_value_output_first, + thrust::make_zip_iterator(get_optional_dataframe_buffer_begin(tmp_key_buffer), + get_optional_dataframe_buffer_begin(tmp_output_indices)), + is_equal_t{init}); + + sorted_unique_key_first = get_optional_dataframe_buffer_begin(tmp_key_buffer); + sorted_unique_nzd_key_last = get_optional_dataframe_buffer_end(tmp_key_buffer); + tmp_vertex_value_output_first = thrust::make_permutation_iterator( + vertex_value_output_first, get_optional_dataframe_buffer_begin(tmp_output_indices)); + + if (key_segment_offsets) { + key_segment_offsets = compute_key_segment_offsets( + sorted_unique_key_first, + sorted_unique_nzd_key_last, + raft::host_span((*local_vertex_partition_segment_offsets).data(), + (*local_vertex_partition_segment_offsets).size()), + edge_partition.major_range_first(), + handle.get_stream()); + assert((*key_segment_offsets).back() == *((*key_segment_offsets).rbegin() + 1)); + assert(sorted_uniue_nzd_key_last == sorted_unique_key_first + (*key_segment_offsets).back()); + } + } else { + tmp_vertex_value_output_first = vertex_value_output_first; + } + + /* 4. compute subgroup_size (used to compute priority in device_gatherv) */ + + [[maybe_unused]] std::conditional_t>, + int, + std::byte /* dummy */> + subgroup_size{}; + if constexpr (GraphViewType::is_multi_gpu && update_major && + std::is_same_v>) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + int num_gpus_per_node{}; + RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus_per_node)); + if (comm_size <= num_gpus_per_node) { + subgroup_size = minor_comm_size; + } else { + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + subgroup_size = partition_manager::map_major_comm_to_gpu_row_comm + ? std::max(num_gpus_per_node / major_comm_size, int{1}) + : std::min(minor_comm_size, num_gpus_per_node); + } + } + + // 5. collect max_tmp_buffer_size, approx_tmp_buffer_size_per_loop, local_key_list_sizes, + // local_v_list_range_firsts, local_v_list_range_lasts, local_key_list_deg1_sizes, + // key_segment_offset_vectors + + std::conditional_t, std::byte /* dummy */> + max_tmp_buffer_sizes{}; + std::conditional_t, std::byte /* dummy */> + tmp_buffer_size_per_loop_approximations{}; + std::conditional_t, std::byte /* dummy */> + local_key_list_sizes{}; + std::conditional_t, std::byte /* dummy */> + local_v_list_range_firsts{}; + std::conditional_t, std::byte /* dummy */> + local_v_list_range_lasts{}; + std::conditional_t>, std::byte /* dummy */> + local_key_list_deg1_sizes{}; // if global degree is 1, any valid local value should be selected + std::conditional_t>>, + std::byte /* dummy */> + key_segment_offset_vectors{}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + + auto max_tmp_buffer_size = + static_cast(static_cast(handle.get_device_properties().totalGlobalMem) * 0.2); + size_t approx_tmp_buffer_size_per_loop{0}; + if constexpr (update_major) { + size_t key_size{0}; + if constexpr (use_input_key) { + if constexpr (std::is_arithmetic_v) { + key_size = sizeof(key_t); + } else { + key_size = sum_thrust_tuple_element_sizes(); + } + } + size_t value_size{0}; + if constexpr (std::is_arithmetic_v) { + value_size = sizeof(T); + } else { + value_size = sum_thrust_tuple_element_sizes(); + } + + size_t major_range_size{}; + if constexpr (use_input_key) { + major_range_size = static_cast( + thrust::distance(sorted_unique_key_first, sorted_unique_nzd_key_last)); + ; + } else { + major_range_size = graph_view.local_vertex_partition_range_size(); + } + size_t size_per_key{}; + if constexpr (filter_input_key) { + size_per_key = + key_size + + value_size / 2; // to reflect that many keys will be filtered out, note that this is a + // simple approximation, memory requirement in this case is much more + // complex as we store additional temporary variables + + } else { + size_per_key = key_size + value_size; + } + approx_tmp_buffer_size_per_loop = major_range_size * size_per_key; + } + + size_t num_scalars = 2; // max_tmp_buffer_size, approx_tmp_buffer_size_per_loop + size_t num_scalars_less_key_segment_offsets = num_scalars; + if constexpr (use_input_key) { + num_scalars += 1; // local_key_list_size + if constexpr (try_bitmap) { + num_scalars += 2; // local_key_list_range_first, local_key_list_range_last + } + if (filter_input_key && graph_view.use_dcs()) { + num_scalars += 1; // local_key_list_degree_1_size + } + num_scalars_less_key_segment_offsets = num_scalars; + if (key_segment_offsets) { num_scalars += (*key_segment_offsets).size(); } + } + + rmm::device_uvector d_aggregate_tmps(minor_comm_size * num_scalars, + handle.get_stream()); + auto hypersparse_degree_offsets = + graph_view.local_vertex_partition_hypersparse_degree_offsets(); + thrust::tabulate( + handle.get_thrust_policy(), + d_aggregate_tmps.begin() + num_scalars * minor_comm_rank, + d_aggregate_tmps.begin() + num_scalars * minor_comm_rank + + num_scalars_less_key_segment_offsets, + [max_tmp_buffer_size, + approx_tmp_buffer_size_per_loop, + sorted_unique_key_first, + sorted_unique_nzd_key_last, + deg1_v_first = (filter_input_key && graph_view.use_dcs()) + ? thrust::make_optional(graph_view.local_vertex_partition_range_first() + + (*local_vertex_partition_segment_offsets)[3] + + *((*hypersparse_degree_offsets).rbegin() + 1)) + : thrust::nullopt, + vertex_partition_range_first = + graph_view.local_vertex_partition_range_first()] __device__(size_t i) { + if (i == 0) { + return max_tmp_buffer_size; + } else if (i == 1) { + return approx_tmp_buffer_size_per_loop; + } + if constexpr (use_input_key) { + auto v_list_size = static_cast( + thrust::distance(sorted_unique_key_first, sorted_unique_nzd_key_last)); + if (i == 2) { return v_list_size; } + if constexpr (try_bitmap) { + if (i == 3) { + vertex_t first{}; + if (v_list_size > 0) { + first = *sorted_unique_key_first; + } else { + first = vertex_partition_range_first; + } + assert(static_cast(static_cast(first)) == first); + return static_cast(first); + } else if (i == 4) { + vertex_t last{}; + if (v_list_size > 0) { + last = *(sorted_unique_key_first + (v_list_size - 1)) + 1; + } else { + last = vertex_partition_range_first; + } + assert(static_cast(static_cast(last)) == last); + return static_cast(last); + } else if (i == 5) { + if (deg1_v_first) { + auto sorted_unique_v_first = thrust::make_transform_iterator( + sorted_unique_key_first, + cuda::proclaim_return_type([] __device__(auto key) { + return thrust_tuple_get_or_identity(key); + })); + return v_list_size - static_cast(thrust::distance( + sorted_unique_v_first, + thrust::lower_bound(thrust::seq, + sorted_unique_v_first, + sorted_unique_v_first + v_list_size, + deg1_v_first))); + } + } + } else { + if (i == 3) { + if (deg1_v_first) { + auto sorted_unique_v_first = thrust::make_transform_iterator( + sorted_unique_key_first, + cuda::proclaim_return_type([] __device__(auto key) { + return thrust_tuple_get_or_identity(key); + })); + return v_list_size - static_cast(thrust::distance( + sorted_unique_v_first, + thrust::lower_bound(thrust::seq, + sorted_unique_v_first, + sorted_unique_v_first + v_list_size, + deg1_v_first))); + } + } + } + } + assert(false); + return size_t{0}; + }); + if constexpr (use_input_key) { + if (key_segment_offsets) { + raft::update_device(d_aggregate_tmps.data() + (num_scalars * minor_comm_rank + + num_scalars_less_key_segment_offsets), + (*key_segment_offsets).data(), + (*key_segment_offsets).size(), + handle.get_stream()); + } + } + + if (minor_comm_size > 1) { + device_allgather(minor_comm, + d_aggregate_tmps.data() + minor_comm_rank * num_scalars, + d_aggregate_tmps.data(), + num_scalars, + handle.get_stream()); + } + + std::vector h_aggregate_tmps(d_aggregate_tmps.size()); + raft::update_host(h_aggregate_tmps.data(), + d_aggregate_tmps.data(), + d_aggregate_tmps.size(), + handle.get_stream()); + handle.sync_stream(); + max_tmp_buffer_sizes = std::vector(minor_comm_size); + tmp_buffer_size_per_loop_approximations = std::vector(minor_comm_size); + if constexpr (use_input_key) { + local_key_list_sizes = std::vector(minor_comm_size); + if constexpr (try_bitmap) { + local_v_list_range_firsts = std::vector(minor_comm_size); + local_v_list_range_lasts = std::vector(minor_comm_size); + } + if constexpr (filter_input_key) { + if (graph_view.use_dcs()) { + local_key_list_deg1_sizes = std::vector(minor_comm_size); + } + } + if (key_segment_offsets) { + key_segment_offset_vectors = std::vector>{}; + (*key_segment_offset_vectors).reserve(minor_comm_size); + } + } + for (int i = 0; i < minor_comm_size; ++i) { + max_tmp_buffer_sizes[i] = h_aggregate_tmps[i * num_scalars]; + tmp_buffer_size_per_loop_approximations[i] = h_aggregate_tmps[i * num_scalars + 1]; + if constexpr (use_input_key) { + local_key_list_sizes[i] = h_aggregate_tmps[i * num_scalars + 2]; + if constexpr (try_bitmap) { + local_v_list_range_firsts[i] = + static_cast(h_aggregate_tmps[i * num_scalars + 3]); + local_v_list_range_lasts[i] = + static_cast(h_aggregate_tmps[i * num_scalars + 4]); + } + if constexpr (filter_input_key) { + if (graph_view.use_dcs()) { + (*local_key_list_deg1_sizes)[i] = + static_cast(h_aggregate_tmps[i * num_scalars + (try_bitmap ? 5 : 3)]); + } + } + if (key_segment_offsets) { + (*key_segment_offset_vectors) + .emplace_back( + h_aggregate_tmps.begin() + i * num_scalars + num_scalars_less_key_segment_offsets, + h_aggregate_tmps.begin() + i * num_scalars + num_scalars_less_key_segment_offsets + + (*key_segment_offsets).size()); + } + } + } + } else { + if constexpr (use_input_key) { + local_key_list_sizes = std::vector{ + static_cast(thrust::distance(sorted_unique_key_first, sorted_unique_nzd_key_last))}; + if (key_segment_offsets) { + key_segment_offset_vectors = std::vector>(1); + (*key_segment_offset_vectors)[0] = *key_segment_offsets; + } + } + } + + // 6. compute optional bitmap info & compressed vertex list + + bool v_compressible{false}; + std:: + conditional_t>, std::byte /* dummy */> + v_list_bitmap{}; + std:: + conditional_t>, std::byte /* dummy */> + compressed_v_list{}; + if constexpr (try_bitmap) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + if (minor_comm_size > 1) { + auto const minor_comm_rank = minor_comm.get_rank(); + + if constexpr (sizeof(vertex_t) == 8) { + vertex_t local_v_list_max_range_size{0}; + for (int i = 0; i < minor_comm_size; ++i) { + auto range_size = local_v_list_range_lasts[i] - local_v_list_range_firsts[i]; + local_v_list_max_range_size = std::max(range_size, local_v_list_max_range_size); + } + if (local_v_list_max_range_size <= + std::numeric_limits::max()) { // broadcast 32bit offset values instead of 64 + // bit vertex IDs + v_compressible = true; + } + } + + double avg_fill_ratio{0.0}; + for (int i = 0; i < minor_comm_size; ++i) { + auto num_keys = static_cast(local_key_list_sizes[i]); + auto range_size = local_v_list_range_lasts[i] - local_v_list_range_firsts[i]; + avg_fill_ratio += + (range_size > 0) ? (num_keys / static_cast(range_size)) : double{0.0}; + } + avg_fill_ratio /= static_cast(minor_comm_size); + double threshold_ratio = + 2.0 /* tuning parameter (consider that we need to reprodce vertex list from bitmap)*/ / + static_cast((v_compressible ? sizeof(uint32_t) : sizeof(vertex_t)) * 8); + auto avg_key_list_size = + std::reduce(local_key_list_sizes.begin(), local_key_list_sizes.end()) / + static_cast(minor_comm_size); + + if ((avg_fill_ratio > threshold_ratio) && + (static_cast(avg_key_list_size) > + packed_bools_per_word() * + 32 /* tuning parameter, to considerr additional kernel launch overhead */)) { + v_list_bitmap = compute_vertex_list_bitmap_info(sorted_unique_key_first, + sorted_unique_nzd_key_last, + local_v_list_range_firsts[minor_comm_rank], + local_v_list_range_lasts[minor_comm_rank], + handle.get_stream()); + } else if (v_compressible) { + rmm::device_uvector tmps(local_key_list_sizes[minor_comm_rank], + handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + sorted_unique_key_first, + sorted_unique_nzd_key_last, + tmps.begin(), + cuda::proclaim_return_type( + [range_first = local_v_list_range_firsts[minor_comm_rank]] __device__( + auto v) { return static_cast(v - range_first); })); + compressed_v_list = std::move(tmps); + } + } + } + + bool uint32_key_output_offset = false; + if constexpr (GraphViewType::is_multi_gpu && update_major && + std::is_same_v>) { + size_t max_key_offset_size = std::numeric_limits::max(); + if constexpr (filter_input_key) { + max_key_offset_size = std::reduce( + local_key_list_sizes.begin(), local_key_list_sizes.end(), size_t{0}, [](auto l, auto r) { + return std::max(l, r); + }); + } else { + static_assert(!use_input_key); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(i)); + auto const& segment_offsets = graph_view.local_edge_partition_segment_offsets(i); + + auto output_range_size = + segment_offsets ? *((*segment_offsets).rbegin() + 1) /* exclude the zero degree segment */ + : edge_partition.major_range_size(); + + max_key_offset_size = std::max(static_cast(output_range_size), max_key_offset_size); + } + } + uint32_key_output_offset = + (max_key_offset_size <= static_cast(std::numeric_limits::max())); + } + + // 7. set-up stream pool & events + + std::optional> stream_pool_indices{std::nullopt}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto max_tmp_buffer_size = + std::reduce(max_tmp_buffer_sizes.begin(), max_tmp_buffer_sizes.end()) / + static_cast(minor_comm_size); + auto approx_tmp_buffer_size_per_loop = + std::reduce(tmp_buffer_size_per_loop_approximations.begin(), + tmp_buffer_size_per_loop_approximations.end()) / + static_cast(minor_comm_size); + size_t num_streams_per_loop{1}; + if (local_vertex_partition_segment_offsets && (handle.get_stream_pool_size() >= max_segments)) { + num_streams_per_loop = std::max( + std::min(size_t{8} / graph_view.number_of_local_edge_partitions(), max_segments), + size_t{ + 1}); // Note that "CUDA_DEVICE_MAX_CONNECTIONS (default: 8, can be set to [1, 32])" sets + // the number of queues, if the total number of streams exceeds this number, jobs on + // different streams can be sent to one queue leading to false dependency. Setting + // num_concurrent_loops above the number of queues has some benefits in NCCL + // communications but creating too many streams just for compute may not help. + } + stream_pool_indices = init_stream_pool_indices(max_tmp_buffer_size, + approx_tmp_buffer_size_per_loop, + graph_view.number_of_local_edge_partitions(), + num_streams_per_loop, + handle.get_stream_pool_size()); + if ((*stream_pool_indices).size() <= 1) { stream_pool_indices = std::nullopt; } + } + + size_t num_concurrent_loops{1}; + std::optional> loop_stream_pool_indices{ + std::nullopt}; // first num_concurrent_loops streams from stream_pool_indices + if (stream_pool_indices) { + num_concurrent_loops = + std::min(graph_view.number_of_local_edge_partitions(), (*stream_pool_indices).size()); + loop_stream_pool_indices = std::vector(num_concurrent_loops); + std::iota((*loop_stream_pool_indices).begin(), (*loop_stream_pool_indices).end(), size_t{0}); + } + + // 8. set-up temporary buffers + + using minor_tmp_buffer_type = std::conditional_t, + edge_dst_property_t>; + [[maybe_unused]] std::unique_ptr minor_tmp_buffer{}; + if constexpr (GraphViewType::is_multi_gpu && !update_major) { + minor_tmp_buffer = std::make_unique(handle, graph_view); + auto minor_init = init; + auto view = minor_tmp_buffer->view(); + if (view.keys()) { // defer applying the initial value to the end as minor_tmp_buffer ma not + // store values for the entire minor rangey + minor_init = ReduceOp::identity_element; + } else { + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_rank = major_comm.get_rank(); + minor_init = (major_comm_rank == 0) ? init : ReduceOp::identity_element; + } + fill_edge_minor_property(handle, graph_view, minor_tmp_buffer->mutable_view(), minor_init); + } + + using edge_partition_minor_output_device_view_t = + std::conditional_tmutable_view().value_first())>, + void /* dummy */>; + + auto counters = allocate_optional_dataframe_buffer< + std::conditional_t>( + num_concurrent_loops, handle.get_stream()); + + if constexpr (!GraphViewType::is_multi_gpu || !use_input_key) { + if (loop_stream_pool_indices) { handle.sync_stream(); } + } + + // 9. process local edge partitions + + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); i += num_concurrent_loops) { + auto loop_count = + std::min(num_concurrent_loops, graph_view.number_of_local_edge_partitions() - i); + + std::conditional_t< + GraphViewType::is_multi_gpu && use_input_key, + std::conditional_t< + try_bitmap, + std::vector, rmm::device_uvector>>, + std::vector>>, + std::byte /* dummy */> + edge_partition_key_buffers{}; + std::conditional_t, rmm::device_uvector>>>, + std::byte /* dummy */> + edge_partition_hypersparse_key_offset_vectors{}; // drop zero local degree keys in th + // hypersparse regione + std::conditional_t>, std::byte /* dummy */> + edge_partition_deg1_hypersparse_key_offset_counts{}; + std::vector process_local_edges(loop_count, true); + + if constexpr (GraphViewType::is_multi_gpu && use_input_key) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto const minor_comm_rank = minor_comm.get_rank(); + + edge_partition_key_buffers.reserve(loop_count); + + std::conditional_t>>, + std::byte /* dummy */> + edge_partition_bitmap_buffers{std::nullopt}; + if constexpr (try_bitmap) { + if (v_list_bitmap) { + edge_partition_bitmap_buffers = std::vector>{}; + (*edge_partition_bitmap_buffers).reserve(loop_count); + } + } + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + + bool use_bitmap_buffer = false; + if constexpr (try_bitmap) { + if (edge_partition_bitmap_buffers) { + (*edge_partition_bitmap_buffers) + .emplace_back(packed_bool_size(local_v_list_range_lasts[partition_idx] - + local_v_list_range_firsts[partition_idx]), + handle.get_stream()); + use_bitmap_buffer = true; + } + } + if (!use_bitmap_buffer) { + bool allocated{false}; + if constexpr (try_bitmap) { + if (v_compressible) { + edge_partition_key_buffers.push_back(rmm::device_uvector( + local_key_list_sizes[partition_idx], handle.get_stream())); + allocated = true; + } + } + if (!allocated) { + edge_partition_key_buffers.push_back(allocate_dataframe_buffer( + local_key_list_sizes[partition_idx], handle.get_stream())); + } + } + + if constexpr (filter_input_key) { + if (static_cast(partition_idx) == minor_comm_rank) { + process_local_edges[j] = false; + } + } + } + + device_group_start(minor_comm); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + if constexpr (try_bitmap) { + if (v_list_bitmap) { + device_bcast(minor_comm, + (*v_list_bitmap).data(), + get_dataframe_buffer_begin((*edge_partition_bitmap_buffers)[j]), + size_dataframe_buffer((*edge_partition_bitmap_buffers)[j]), + static_cast(partition_idx), + handle.get_stream()); + } else if (compressed_v_list) { + device_bcast(minor_comm, + (*compressed_v_list).data(), + get_dataframe_buffer_begin(std::get<0>(edge_partition_key_buffers[j])), + local_key_list_sizes[partition_idx], + static_cast(partition_idx), + handle.get_stream()); + } else { + device_bcast(minor_comm, + sorted_unique_key_first, + get_dataframe_buffer_begin(std::get<1>(edge_partition_key_buffers[j])), + local_key_list_sizes[partition_idx], + static_cast(partition_idx), + handle.get_stream()); + } + } else { + device_bcast(minor_comm, + sorted_unique_key_first, + get_dataframe_buffer_begin(edge_partition_key_buffers[j]), + local_key_list_sizes[partition_idx], + static_cast(partition_idx), + handle.get_stream()); + } + } + device_group_end(minor_comm); + if (loop_stream_pool_indices) { handle.sync_stream(); } + + if constexpr (try_bitmap) { + if (edge_partition_bitmap_buffers) { + // copy keys from temporary bitmap buffers to key buffers (copy only the sparse segments + // if filter_input_key is true) + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + std::variant, rmm::device_uvector> keys = + rmm::device_uvector(0, loop_stream); + if (v_compressible) { + std::get<0>(keys).resize( + process_local_edges[j] ? local_key_list_sizes[partition_idx] : size_t{0}, + loop_stream); + } else { + keys = rmm::device_uvector( + process_local_edges[j] ? local_key_list_sizes[partition_idx] : size_t{0}, + loop_stream); + } + + auto& rx_bitmap = (*edge_partition_bitmap_buffers)[j]; + if (process_local_edges[j]) { + auto range_first = local_v_list_range_firsts[partition_idx]; + auto range_last = local_v_list_range_lasts[partition_idx]; + if constexpr (filter_input_key) { + if (graph_view.use_dcs()) { // skip copying the hypersparse segment + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + range_last = std::min(range_last, *(edge_partition.major_hypersparse_first())); + } + } + if (range_first < range_last) { + if (keys.index() == 0) { + retrieve_vertex_list_from_bitmap( + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + get_dataframe_buffer_begin(std::get<0>(keys)), + raft::device_span( + counters.data() + j, + size_t{1}), // dummy, we already know the counts (i.e. + // (*key_segment_offset_vectors)[partition_idx][3]) + uint32_t{0}, + static_cast(range_last - range_first), + loop_stream); + } else { + retrieve_vertex_list_from_bitmap( + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + get_dataframe_buffer_begin(std::get<1>(keys)), + raft::device_span( + counters.data() + j, + size_t{1}), // dummy, we already know the counts (i.e. + // (*key_segment_offset_vectors)[partition_idx][3]) + range_first, + range_last, + loop_stream); + } + } + } else { + rx_bitmap.resize(0, loop_stream); + rx_bitmap.shrink_to_fit(loop_stream); + } + edge_partition_key_buffers.push_back(std::move(keys)); + } + } + } + + if constexpr (filter_input_key) { + if (graph_view.use_dcs()) { + edge_partition_hypersparse_key_offset_vectors = + std::vector, rmm::device_uvector>>{}; + (*edge_partition_hypersparse_key_offset_vectors).reserve(loop_count); + edge_partition_deg1_hypersparse_key_offset_counts = std::vector(loop_count, 0); + + std::conditional_t, + rmm::device_uvector>>, + std::vector>>>, + std::byte /* dummy */> + edge_partition_new_key_buffers{}; + bool allocate_new_key_buffer{true}; + if constexpr (try_bitmap) { + if (edge_partition_bitmap_buffers) { allocate_new_key_buffer = false; } + } + if (allocate_new_key_buffer) { // allocate new key buffers and copy the sparse segment + // keys to the new key buffers + if constexpr (try_bitmap) { + edge_partition_new_key_buffers = std::vector< + std::variant, rmm::device_uvector>>{}; + } else { + edge_partition_new_key_buffers = std::vector>{}; + } + (*edge_partition_new_key_buffers).reserve(loop_count); + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + auto const& key_segment_offsets = (*key_segment_offset_vectors)[partition_idx]; + + if constexpr (try_bitmap) { + if (v_compressible) { + auto new_key_buffer = rmm::device_uvector( + process_local_edges[j] ? local_key_list_sizes[partition_idx] : size_t{0}, + loop_stream); + if (process_local_edges[j]) { + thrust::copy( + rmm::exec_policy_nosync(loop_stream), + get_dataframe_buffer_begin(std::get<0>(edge_partition_key_buffers[j])), + get_dataframe_buffer_begin(std::get<0>(edge_partition_key_buffers[j])) + + key_segment_offsets[3], + get_dataframe_buffer_begin(new_key_buffer)); + } else { + std::get<0>(edge_partition_key_buffers[j]).resize(0, loop_stream); + std::get<0>(edge_partition_key_buffers[j]).shrink_to_fit(loop_stream); + } + (*edge_partition_new_key_buffers).push_back(std::move(new_key_buffer)); + } else { + auto new_key_buffer = rmm::device_uvector( + process_local_edges[j] ? local_key_list_sizes[partition_idx] : size_t{0}, + loop_stream); + if (process_local_edges[j]) { + thrust::copy( + rmm::exec_policy_nosync(loop_stream), + get_dataframe_buffer_begin(std::get<1>(edge_partition_key_buffers[j])), + get_dataframe_buffer_begin(std::get<1>(edge_partition_key_buffers[j])) + + key_segment_offsets[3], + get_dataframe_buffer_begin(new_key_buffer)); + } else { + std::get<1>(edge_partition_key_buffers[j]).resize(0, loop_stream); + std::get<1>(edge_partition_key_buffers[j]).shrink_to_fit(loop_stream); + } + (*edge_partition_new_key_buffers).push_back(std::move(new_key_buffer)); + } + } else { + auto new_key_buffer = allocate_dataframe_buffer( + process_local_edges[j] ? local_key_list_sizes[partition_idx] : size_t{0}, + loop_stream); + if (process_local_edges[j]) { + thrust::copy(rmm::exec_policy_nosync(loop_stream), + get_dataframe_buffer_begin(edge_partition_key_buffers[j]), + get_dataframe_buffer_begin(edge_partition_key_buffers[j]) + + key_segment_offsets[3], + get_dataframe_buffer_begin(new_key_buffer)); + } else { + edge_partition_key_buffers[j].resize(0, loop_stream); + edge_partition_key_buffers[j].shrink_to_fit(loop_stream); + } + (*edge_partition_new_key_buffers).push_back(std::move(new_key_buffer)); + } + } + } + + if constexpr (try_bitmap) { // if we are using a bitmap buffer + if (v_list_bitmap) { + std::vector> input_count_offset_vectors{}; + input_count_offset_vectors.reserve(loop_count); + + std::vector> filtered_bitmap_vectors{}; + std::vector> output_count_offset_vectors{}; + filtered_bitmap_vectors.reserve(loop_count); + output_count_offset_vectors.reserve(loop_count); + + std::vector range_offset_firsts(loop_count, 0); + std::vector range_offset_lasts(loop_count, 0); + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + rmm::device_uvector input_count_offsets(0, loop_stream); + if (process_local_edges[j]) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + auto const& segment_offsets = + graph_view.local_edge_partition_segment_offsets(partition_idx); + + auto range_offset_first = + std::min((edge_partition.major_range_first() + (*segment_offsets)[3] > + local_v_list_range_firsts[partition_idx]) + ? ((edge_partition.major_range_first() + (*segment_offsets)[3]) - + local_v_list_range_firsts[partition_idx]) + : vertex_t{0}, + local_v_list_range_lasts[partition_idx] - + local_v_list_range_firsts[partition_idx]); + auto range_offset_last = + std::min(((edge_partition.major_range_first() + (*segment_offsets)[4]) > + local_v_list_range_firsts[partition_idx]) + ? ((edge_partition.major_range_first() + (*segment_offsets)[4]) - + local_v_list_range_firsts[partition_idx]) + : vertex_t{0}, + local_v_list_range_lasts[partition_idx] - + local_v_list_range_firsts[partition_idx]); + if (range_offset_first < range_offset_last) { + auto const& rx_bitmap = (*edge_partition_bitmap_buffers)[j]; + auto input_count_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(packed_bool_offset(range_offset_first)), + cuda::proclaim_return_type( + [range_bitmap = + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + range_offset_first] __device__(size_t i) { + auto word = range_bitmap[i]; + if (i == packed_bool_offset(range_offset_first)) { + word &= ~packed_bool_partial_mask( + range_offset_first % + packed_bools_per_word()); // clear the bits in the sparse region + } + return static_cast(__popc(word)); + })); + input_count_offsets.resize( + (rx_bitmap.size() - packed_bool_offset(range_offset_first)) + 1, loop_stream); + input_count_offsets.set_element_to_zero_async(0, loop_stream); + thrust::inclusive_scan( + rmm::exec_policy_nosync(loop_stream), + input_count_first, + input_count_first + + (rx_bitmap.size() - packed_bool_offset(range_offset_first)), + input_count_offsets.begin() + 1); + } + range_offset_firsts[j] = range_offset_first; + range_offset_lasts[j] = range_offset_last; + } + input_count_offset_vectors.push_back(std::move(input_count_offsets)); + } + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + rmm::device_uvector filtered_bitmap(0, loop_stream); + rmm::device_uvector output_count_offsets(0, loop_stream); + if (process_local_edges[j]) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + + auto segment_bitmap = *(edge_partition.dcs_nzd_range_bitmap()); + + auto range_offset_first = range_offset_firsts[j]; + auto range_offset_last = range_offset_lasts[j]; + if (range_offset_first < range_offset_last) { + auto const& rx_bitmap = (*edge_partition_bitmap_buffers)[j]; + filtered_bitmap.resize( + rx_bitmap.size() - packed_bool_offset(range_offset_first), loop_stream); + thrust::tabulate( + rmm::exec_policy_nosync(loop_stream), + filtered_bitmap.begin(), + filtered_bitmap.end(), + cuda::proclaim_return_type( + [range_bitmap = + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + segment_bitmap = raft::device_span(segment_bitmap.data(), + segment_bitmap.size()), + range_first = local_v_list_range_firsts[partition_idx], + range_offset_first, + range_offset_last, + major_hypersparse_first = + *(edge_partition.major_hypersparse_first())] __device__(size_t i) { + auto this_word_range_offset_first = cuda::std::max( + static_cast((packed_bool_offset(range_offset_first) + i) * + packed_bools_per_word()), + range_offset_first); + auto this_word_range_offset_last = + cuda::std::min(static_cast( + (packed_bool_offset(range_offset_first) + (i + 1)) * + packed_bools_per_word()), + range_offset_last); + auto range_lead_bits = static_cast(this_word_range_offset_first % + packed_bools_per_word()); + auto range_bitmap_word = + range_bitmap[packed_bool_offset(range_offset_first) + i]; + if (i == 0) { // clear the bits in the sparse region + range_bitmap_word &= ~packed_bool_partial_mask(range_offset_first % + packed_bools_per_word()); + } + auto this_word_hypersparse_offset_first = + (range_first + this_word_range_offset_first) - major_hypersparse_first; + auto num_bits = static_cast(this_word_range_offset_last - + this_word_range_offset_first); + auto hypersparse_lead_bits = + static_cast(this_word_hypersparse_offset_first) % + packed_bools_per_word(); + auto segment_bitmap_word = ((segment_bitmap[packed_bool_offset( + this_word_hypersparse_offset_first)] >> + hypersparse_lead_bits)) + << range_lead_bits; + auto remaining_bits = + (num_bits > (packed_bools_per_word() - hypersparse_lead_bits)) + ? (num_bits - (packed_bools_per_word() - hypersparse_lead_bits)) + : size_t{0}; + if (remaining_bits > 0) { + segment_bitmap_word |= + ((segment_bitmap + [packed_bool_offset(this_word_hypersparse_offset_first) + 1] & + packed_bool_partial_mask(remaining_bits)) + << ((packed_bools_per_word() - hypersparse_lead_bits) + + range_lead_bits)); + } + return range_bitmap_word & segment_bitmap_word; + })); + auto output_count_first = thrust::make_transform_iterator( + filtered_bitmap.begin(), + cuda::proclaim_return_type([] __device__(uint32_t word) { + return static_cast(__popc(word)); + })); + output_count_offsets.resize(filtered_bitmap.size() + 1, loop_stream); + output_count_offsets.set_element_to_zero_async(0, loop_stream); + thrust::inclusive_scan(rmm::exec_policy_nosync(loop_stream), + output_count_first, + output_count_first + filtered_bitmap.size(), + output_count_offsets.begin() + 1); + } + } + filtered_bitmap_vectors.push_back(std::move(filtered_bitmap)); + output_count_offset_vectors.push_back(std::move(output_count_offsets)); + } + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + auto const& key_segment_offsets = (*key_segment_offset_vectors)[partition_idx]; + + auto& keys = edge_partition_key_buffers[j]; + std::variant, rmm::device_uvector> offsets = + rmm::device_uvector(0, loop_stream); + if (uint32_key_output_offset) { + std::get<0>(offsets).resize(process_local_edges[j] + ? (key_segment_offsets[4] - key_segment_offsets[3]) + : vertex_t{0}, + loop_stream); + } else { + offsets = rmm::device_uvector( + process_local_edges[j] ? (key_segment_offsets[4] - key_segment_offsets[3]) + : vertex_t{0}, + loop_stream); + } + + if (process_local_edges[j]) { + auto range_offset_first = range_offset_firsts[j]; + auto range_offset_last = range_offset_lasts[j]; + if (range_offset_first < range_offset_last) { + auto const& rx_bitmap = (*edge_partition_bitmap_buffers)[j]; + auto const& input_count_offsets = input_count_offset_vectors[j]; + auto const& filtered_bitmap = filtered_bitmap_vectors[j]; + auto const& output_count_offsets = output_count_offset_vectors[j]; + + if (keys.index() == 0) { + if (offsets.index() == 0) { + thrust::for_each( + rmm::exec_policy_nosync(loop_stream), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(filtered_bitmap.size()), + [range_bitmap = + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + filtered_bitmap = raft::device_span( + filtered_bitmap.data(), filtered_bitmap.size()), + input_count_offsets = raft::device_span( + input_count_offsets.data(), input_count_offsets.size()), + output_count_offsets = raft::device_span( + output_count_offsets.data(), output_count_offsets.size()), + output_key_first = + get_dataframe_buffer_begin(std::get<0>(keys)) + key_segment_offsets[3], + output_offset_first = std::get<0>(offsets).begin(), + range_offset_first, + start_key_offset = key_segment_offsets[3]] __device__(size_t i) { + auto range_bitmap_word = + range_bitmap[packed_bool_offset(range_offset_first) + i]; + if (i == 0) { // clear the bits in the sparse region + range_bitmap_word &= ~packed_bool_partial_mask( + range_offset_first % packed_bools_per_word()); + } + auto filtered_bitmap_word = filtered_bitmap[i]; + auto lead_bits = (i == 0) + ? static_cast(range_offset_first % + packed_bools_per_word()) + : static_cast(0); + auto this_word_start_v_offset = + static_cast((packed_bool_offset(range_offset_first) + i) * + packed_bools_per_word()); + auto this_word_start_key_offset = + static_cast(start_key_offset + input_count_offsets[i]); + auto this_word_output_start_offset = output_count_offsets[i]; + for (int j = 0; j < __popc(filtered_bitmap_word); ++j) { + auto jth_set_bit_pos = static_cast( + __fns(filtered_bitmap_word, lead_bits, j + 1)); + *(output_key_first + (this_word_output_start_offset + j)) = + this_word_start_v_offset + jth_set_bit_pos; + *(output_offset_first + (this_word_output_start_offset + j)) = + this_word_start_key_offset + + static_cast(__popc( + range_bitmap_word & packed_bool_partial_mask(jth_set_bit_pos))); + } + }); + } else { + thrust::for_each( + rmm::exec_policy_nosync(loop_stream), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(filtered_bitmap.size()), + [range_bitmap = + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + filtered_bitmap = raft::device_span( + filtered_bitmap.data(), filtered_bitmap.size()), + input_count_offsets = raft::device_span( + input_count_offsets.data(), input_count_offsets.size()), + output_count_offsets = raft::device_span( + output_count_offsets.data(), output_count_offsets.size()), + output_key_first = + get_dataframe_buffer_begin(std::get<0>(keys)) + key_segment_offsets[3], + output_offset_first = std::get<1>(offsets).begin(), + range_offset_first, + start_key_offset = key_segment_offsets[3]] __device__(size_t i) { + auto range_bitmap_word = + range_bitmap[packed_bool_offset(range_offset_first) + i]; + if (i == 0) { // clear the bits in the sparse region + range_bitmap_word &= ~packed_bool_partial_mask( + range_offset_first % packed_bools_per_word()); + } + auto filtered_bitmap_word = filtered_bitmap[i]; + auto lead_bits = (i == 0) + ? static_cast(range_offset_first % + packed_bools_per_word()) + : static_cast(0); + auto this_word_start_v_offset = + static_cast((packed_bool_offset(range_offset_first) + i) * + packed_bools_per_word()); + auto this_word_start_key_offset = + static_cast(start_key_offset + input_count_offsets[i]); + auto this_word_output_start_offset = output_count_offsets[i]; + for (int j = 0; j < __popc(filtered_bitmap_word); ++j) { + auto jth_set_bit_pos = static_cast( + __fns(filtered_bitmap_word, lead_bits, j + 1)); + *(output_key_first + (this_word_output_start_offset + j)) = + this_word_start_v_offset + jth_set_bit_pos; + *(output_offset_first + (this_word_output_start_offset + j)) = + this_word_start_key_offset + + static_cast(__popc( + range_bitmap_word & packed_bool_partial_mask(jth_set_bit_pos))); + } + }); + } + } else { + if (offsets.index() == 0) { + thrust::for_each( + rmm::exec_policy_nosync(loop_stream), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(filtered_bitmap.size()), + [range_bitmap = + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + filtered_bitmap = raft::device_span( + filtered_bitmap.data(), filtered_bitmap.size()), + input_count_offsets = raft::device_span( + input_count_offsets.data(), input_count_offsets.size()), + output_count_offsets = raft::device_span( + output_count_offsets.data(), output_count_offsets.size()), + output_key_first = + get_dataframe_buffer_begin(std::get<0>(keys)) + key_segment_offsets[3], + output_offset_first = std::get<0>(offsets).begin(), + range_first = local_v_list_range_firsts[partition_idx], + range_offset_first, + start_key_offset = key_segment_offsets[3]] __device__(size_t i) { + auto range_bitmap_word = + range_bitmap[packed_bool_offset(range_offset_first) + i]; + if (i == 0) { // clear the bits in the sparse region + range_bitmap_word &= ~packed_bool_partial_mask( + range_offset_first % packed_bools_per_word()); + } + auto filtered_bitmap_word = filtered_bitmap[i]; + auto lead_bits = (i == 0) + ? static_cast(range_offset_first % + packed_bools_per_word()) + : static_cast(0); + auto this_word_start_v = + range_first + + static_cast((packed_bool_offset(range_offset_first) + i) * + packed_bools_per_word()); + auto this_word_start_key_offset = + static_cast(start_key_offset + input_count_offsets[i]); + auto this_word_output_start_offset = output_count_offsets[i]; + for (int j = 0; j < __popc(filtered_bitmap_word); ++j) { + auto jth_set_bit_pos = static_cast( + __fns(filtered_bitmap_word, lead_bits, j + 1)); + *(output_key_first + (this_word_output_start_offset + j)) = + this_word_start_v + jth_set_bit_pos; + *(output_offset_first + (this_word_output_start_offset + j)) = + this_word_start_key_offset + + static_cast(__popc( + range_bitmap_word & packed_bool_partial_mask(jth_set_bit_pos))); + } + }); + } else { + thrust::for_each( + rmm::exec_policy_nosync(loop_stream), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(filtered_bitmap.size()), + [range_bitmap = + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + filtered_bitmap = raft::device_span( + filtered_bitmap.data(), filtered_bitmap.size()), + input_count_offsets = raft::device_span( + input_count_offsets.data(), input_count_offsets.size()), + output_count_offsets = raft::device_span( + output_count_offsets.data(), output_count_offsets.size()), + output_key_first = + get_dataframe_buffer_begin(std::get<0>(keys)) + key_segment_offsets[3], + output_offset_first = std::get<1>(offsets).begin(), + range_first = local_v_list_range_firsts[partition_idx], + range_offset_first, + start_key_offset = key_segment_offsets[3]] __device__(size_t i) { + auto range_bitmap_word = + range_bitmap[packed_bool_offset(range_offset_first) + i]; + if (i == 0) { // clear the bits in the sparse region + range_bitmap_word &= ~packed_bool_partial_mask( + range_offset_first % packed_bools_per_word()); + } + auto filtered_bitmap_word = filtered_bitmap[i]; + auto lead_bits = (i == 0) + ? static_cast(range_offset_first % + packed_bools_per_word()) + : static_cast(0); + auto this_word_start_v = + range_first + + static_cast((packed_bool_offset(range_offset_first) + i) * + packed_bools_per_word()); + auto this_word_start_key_offset = + static_cast(start_key_offset + input_count_offsets[i]); + auto this_word_output_start_offset = output_count_offsets[i]; + for (int j = 0; j < __popc(filtered_bitmap_word); ++j) { + auto jth_set_bit_pos = static_cast( + __fns(filtered_bitmap_word, lead_bits, j + 1)); + *(output_key_first + (this_word_output_start_offset + j)) = + this_word_start_v + jth_set_bit_pos; + *(output_offset_first + (this_word_output_start_offset + j)) = + this_word_start_key_offset + + static_cast(__popc( + range_bitmap_word & packed_bool_partial_mask(jth_set_bit_pos))); + } + }); + } + } + thrust::transform( + rmm::exec_policy_nosync(loop_stream), + output_count_offsets.begin() + (output_count_offsets.size() - 1), + output_count_offsets.end(), + counters.data() + j, + typecast_t{}); + } else { + thrust::fill(rmm::exec_policy_nosync(loop_stream), + counters.data() + j, + counters.data() + (j + 1), + size_t{0}); + } + } + + (*edge_partition_hypersparse_key_offset_vectors).push_back(std::move(offsets)); + } + } + } + if (edge_partition_new_key_buffers) { // if there is no bitmap buffer + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + auto const& key_segment_offsets = (*key_segment_offset_vectors)[partition_idx]; + + auto& keys = edge_partition_key_buffers[j]; + std::variant, rmm::device_uvector> offsets = + rmm::device_uvector(0, loop_stream); + if (uint32_key_output_offset) { + std::get<0>(offsets).resize(process_local_edges[j] + ? (key_segment_offsets[4] - key_segment_offsets[3]) + : vertex_t{0}, + loop_stream); + } else { + offsets = rmm::device_uvector( + process_local_edges[j] ? (key_segment_offsets[4] - key_segment_offsets[3]) + : vertex_t{0}, + loop_stream); + } + + if (process_local_edges[j]) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + auto const& segment_offsets = + graph_view.local_edge_partition_segment_offsets(partition_idx); + + auto segment_bitmap = *(edge_partition.dcs_nzd_range_bitmap()); + + auto& new_keys = (*edge_partition_new_key_buffers)[j]; + if constexpr (try_bitmap) { + assert(!v_list_bitmap); + if (keys.index() == 0) { + auto flag_first = thrust::make_transform_iterator( + get_dataframe_buffer_begin(std::get<0>(keys)) + key_segment_offsets[3], + cuda::proclaim_return_type( + [segment_bitmap = raft::device_span(segment_bitmap.data(), + segment_bitmap.size()), + range_first = local_v_list_range_firsts[partition_idx], + major_hypersparse_first = + *(edge_partition + .major_hypersparse_first())] __device__(uint32_t v_offset) { + auto v = range_first + static_cast(v_offset); + auto segment_offset = v - major_hypersparse_first; + return ((segment_bitmap[packed_bool_offset(segment_offset)] & + packed_bool_mask(segment_offset)) != packed_bool_empty_mask()); + })); + if (offsets.index() == 0) { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(std::get<0>(keys)), + thrust::make_counting_iterator(uint32_t{0})) + + key_segment_offsets[3]; + detail::copy_if_nosync( + input_pair_first, + input_pair_first + (key_segment_offsets[4] - key_segment_offsets[3]), + flag_first, + thrust::make_zip_iterator( + get_dataframe_buffer_begin(std::get<0>(new_keys)) + + key_segment_offsets[3], + std::get<0>(offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } else { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(std::get<0>(keys)), + thrust::make_counting_iterator(size_t{0})) + + key_segment_offsets[3]; + detail::copy_if_nosync( + input_pair_first, + input_pair_first + (key_segment_offsets[4] - key_segment_offsets[3]), + flag_first, + thrust::make_zip_iterator( + get_dataframe_buffer_begin(std::get<0>(new_keys)) + + key_segment_offsets[3], + std::get<1>(offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } + } else { + auto flag_first = thrust::make_transform_iterator( + get_dataframe_buffer_begin(std::get<1>(keys)) + key_segment_offsets[3], + cuda::proclaim_return_type( + [segment_bitmap = raft::device_span(segment_bitmap.data(), + segment_bitmap.size()), + major_hypersparse_first = + *(edge_partition.major_hypersparse_first())] __device__(vertex_t v) { + auto segment_offset = v - major_hypersparse_first; + return ((segment_bitmap[packed_bool_offset(segment_offset)] & + packed_bool_mask(segment_offset)) != packed_bool_empty_mask()); + })); + if (offsets.index() == 0) { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(std::get<1>(keys)), + thrust::make_counting_iterator(uint32_t{0})) + + key_segment_offsets[3]; + detail::copy_if_nosync( + input_pair_first, + input_pair_first + (key_segment_offsets[4] - key_segment_offsets[3]), + flag_first, + thrust::make_zip_iterator( + get_dataframe_buffer_begin(std::get<1>(new_keys)) + + key_segment_offsets[3], + std::get<0>(offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } else { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(std::get<1>(keys)), + thrust::make_counting_iterator(size_t{0})) + + key_segment_offsets[3]; + detail::copy_if_nosync( + input_pair_first, + input_pair_first + (key_segment_offsets[4] - key_segment_offsets[3]), + flag_first, + thrust::make_zip_iterator( + get_dataframe_buffer_begin(std::get<1>(new_keys)) + + key_segment_offsets[3], + std::get<1>(offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } + } + } else { + auto flag_first = thrust::make_transform_iterator( + get_dataframe_buffer_begin(keys) + key_segment_offsets[3], + cuda::proclaim_return_type( + [segment_bitmap = raft::device_span(segment_bitmap.data(), + segment_bitmap.size()), + major_hypersparse_first = + *(edge_partition.major_hypersparse_first())] __device__(auto key) { + auto segment_offset = + thrust_tuple_get_or_identity(key) - major_hypersparse_first; + return ((segment_bitmap[packed_bool_offset(segment_offset)] & + packed_bool_mask(segment_offset)) != packed_bool_empty_mask()); + })); + if (offsets.index() == 0) { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(keys), + thrust::make_counting_iterator(uint32_t{0})) + + key_segment_offsets[3]; + detail::copy_if_nosync( + input_pair_first, + input_pair_first + (key_segment_offsets[4] - key_segment_offsets[3]), + flag_first, + thrust::make_zip_iterator( + get_dataframe_buffer_begin(new_keys) + key_segment_offsets[3], + std::get<0>(offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } else { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(keys), + thrust::make_counting_iterator(size_t{0})) + + key_segment_offsets[3]; + detail::copy_if_nosync( + input_pair_first, + input_pair_first + (key_segment_offsets[4] - key_segment_offsets[3]), + flag_first, + thrust::make_zip_iterator( + get_dataframe_buffer_begin(new_keys) + key_segment_offsets[3], + std::get<1>(offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } + } + } + + (*edge_partition_hypersparse_key_offset_vectors).push_back(std::move(offsets)); + } + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + if (edge_partition_new_key_buffers) { + for (size_t j = 0; j < loop_count; ++j) { + edge_partition_key_buffers[j] = std::move((*edge_partition_new_key_buffers)[j]); + } + } + if (edge_partition_bitmap_buffers) { (*edge_partition_bitmap_buffers).clear(); } + + std::vector h_counts(loop_count); + raft::update_host(h_counts.data(), counters.data(), loop_count, handle.get_stream()); + handle.sync_stream(); + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + if (process_local_edges[j]) { + auto& key_segment_offsets = (*key_segment_offset_vectors)[partition_idx]; + + auto& keys = edge_partition_key_buffers[j]; + if constexpr (try_bitmap) { + if (keys.index() == 0) { + resize_dataframe_buffer( + std::get<0>(keys), key_segment_offsets[3] + h_counts[j], loop_stream); + } else { + resize_dataframe_buffer( + std::get<1>(keys), key_segment_offsets[3] + h_counts[j], loop_stream); + } + } else { + resize_dataframe_buffer(keys, key_segment_offsets[3] + h_counts[j], loop_stream); + } + // skip shrink_to_fit to cut execution time + + auto& offsets = (*edge_partition_hypersparse_key_offset_vectors)[j]; + if (offsets.index() == 0) { + std::get<0>(offsets).resize(h_counts[j], loop_stream); + } else { + std::get<1>(offsets).resize(h_counts[j], loop_stream); + } + // skip shrink_to_fit to cut execution time + } + } + + { // update edge_partition_deg1_hypersparse_key_offset_counts + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + + std::vector h_ptrs( + loop_count); // pointers to hypersparse key offset vectors + std::vector h_scalars( + loop_count * 2); // (key offset vector sizes, start degree 1 key offset) + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + if (process_local_edges[j]) { + auto const& offsets = (*edge_partition_hypersparse_key_offset_vectors)[j]; + if (offsets.index() == 0) { + h_ptrs[j] = static_cast(std::get<0>(offsets).data()); + h_scalars[j * 2] = std::get<0>(offsets).size(); + } else { + h_ptrs[j] = static_cast(std::get<1>(offsets).data()); + h_scalars[j * 2] = std::get<1>(offsets).size(); + } + h_scalars[j * 2 + 1] = + local_key_list_sizes[partition_idx] - (*local_key_list_deg1_sizes)[partition_idx]; + } else { + h_ptrs[j] = static_cast(nullptr); + h_scalars[j * 2] = size_t{0}; + h_scalars[j * 2 + 1] = size_t{0}; + } + } + rmm::device_uvector d_ptrs(h_ptrs.size(), handle.get_stream()); + rmm::device_uvector d_scalars(h_scalars.size(), handle.get_stream()); + raft::update_device(d_ptrs.data(), h_ptrs.data(), h_ptrs.size(), handle.get_stream()); + raft::update_device( + d_scalars.data(), h_scalars.data(), h_scalars.size(), handle.get_stream()); + rmm::device_uvector d_counts(loop_count, handle.get_stream()); + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(loop_count), + d_counts.begin(), + cuda::proclaim_return_type( + [d_ptrs = raft::device_span(d_ptrs.data(), d_ptrs.size()), + d_scalars = raft::device_span(d_scalars.data(), d_scalars.size()), + uint32_key_output_offset] __device__(auto i) { + auto first = d_ptrs[i]; + if (first != static_cast(nullptr)) { + auto size = d_scalars[i * 2]; + auto start_offset = d_scalars[i * 2 + 1]; + if (uint32_key_output_offset) { + auto casted_first = static_cast(first); + return size - static_cast(thrust::distance( + casted_first, + thrust::lower_bound(thrust::seq, + casted_first, + casted_first + size, + static_cast(start_offset)))); + } else { + auto casted_first = static_cast(first); + return size - + static_cast(thrust::distance( + casted_first, + thrust::lower_bound( + thrust::seq, casted_first, casted_first + size, start_offset))); + } + } else { + return size_t{0}; + } + })); + raft::update_host((*edge_partition_deg1_hypersparse_key_offset_counts).data(), + d_counts.data(), + d_counts.size(), + handle.get_stream()); + handle.sync_stream(); + } + } + } + } + + std::conditional_t>, + std::byte /* dummy */> + edge_partition_major_output_buffers{}; + if constexpr (GraphViewType::is_multi_gpu && update_major) { + edge_partition_major_output_buffers.reserve(loop_count); + } + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + if constexpr (GraphViewType::is_multi_gpu && update_major) { + size_t buffer_size{0}; + if (process_local_edges[j]) { + if constexpr (use_input_key) { + auto const& keys = edge_partition_key_buffers[j]; + if constexpr (try_bitmap) { + if (keys.index() == 0) { + buffer_size = size_dataframe_buffer(std::get<0>(keys)); + } else { + buffer_size = size_dataframe_buffer(std::get<1>(keys)); + } + } else { + buffer_size = size_dataframe_buffer(keys); + } + } else { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + auto const& segment_offsets = + graph_view.local_edge_partition_segment_offsets(partition_idx); + + buffer_size = + segment_offsets + ? *((*segment_offsets).rbegin() + 1) /* exclude the zero degree segment */ + : edge_partition.major_range_size(); + } + } + edge_partition_major_output_buffers.push_back( + allocate_dataframe_buffer(buffer_size, loop_stream)); + } + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + + for (size_t j = 0; j < loop_count; ++j) { + if (process_local_edges[j]) { + auto partition_idx = i + j; + + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, partition_idx) + : thrust::nullopt; + size_t num_streams_per_loop{1}; + if (stream_pool_indices) { + assert((*stream_pool_indices).size() >= num_concurrent_loops); + num_streams_per_loop = (*stream_pool_indices).size() / num_concurrent_loops; + } + auto edge_partition_stream_pool_indices = + stream_pool_indices + ? std::make_optional>( + (*stream_pool_indices).data() + j * num_streams_per_loop, num_streams_per_loop) + : std::nullopt; + + T major_init{}; + T major_identity_element{}; + if constexpr (update_major) { + if constexpr (std::is_same_v>) { // if any edge has a non-init value, + // one of the non-init values will + // be selected. + major_init = init; + major_identity_element = init; + } else { + major_init = ReduceOp::identity_element; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + major_init = (static_cast(partition_idx) == minor_comm_rank) + ? init + : ReduceOp::identity_element; + } else { + major_init = init; + } + major_identity_element = ReduceOp::identity_element; + } + } + + std::optional> key_segment_offsets{std::nullopt}; + if constexpr (use_input_key) { + if (key_segment_offset_vectors) { + key_segment_offsets = (*key_segment_offset_vectors)[partition_idx]; + if constexpr (filter_input_key) { + if (edge_partition_hypersparse_key_offset_vectors) { + (*key_segment_offsets).back() = + size_dataframe_buffer(edge_partition_major_output_buffers[j]); + *((*key_segment_offsets).rbegin() + 1) = (*key_segment_offsets).back(); + } + } + } + } else { + auto const& segment_offsets = + graph_view.local_edge_partition_segment_offsets(partition_idx); + if (segment_offsets) { + key_segment_offsets = std::vector((*segment_offsets).size()); + std::transform((*segment_offsets).begin(), + (*segment_offsets).end(), + (*key_segment_offsets).begin(), + [](vertex_t offset) { return static_cast(offset); }); + } + } + + edge_partition_src_input_device_view_t edge_partition_src_value_input{}; + edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; + if constexpr (GraphViewType::is_storage_transposed) { + edge_partition_src_value_input = + edge_partition_src_input_device_view_t(edge_src_value_input); + edge_partition_dst_value_input = + edge_partition_dst_input_device_view_t(edge_dst_value_input, partition_idx); + } else { + edge_partition_src_value_input = + edge_partition_src_input_device_view_t(edge_src_value_input, partition_idx); + edge_partition_dst_value_input = + edge_partition_dst_input_device_view_t(edge_dst_value_input); + } + auto edge_partition_e_value_input = + edge_partition_e_input_device_view_t(edge_value_input, partition_idx); + + std::conditional_t, + edge_partition_minor_output_device_view_t>, + VertexValueOutputIterator> + output_buffer{}; + if constexpr (GraphViewType::is_multi_gpu) { + if constexpr (update_major) { + output_buffer = get_dataframe_buffer_begin(edge_partition_major_output_buffers[j]); + } else { + output_buffer = + edge_partition_minor_output_device_view_t(minor_tmp_buffer->mutable_view()); + } + } else { + output_buffer = tmp_vertex_value_output_first; + } + + bool processed{false}; + if constexpr (try_bitmap) { + auto const& keys = edge_partition_key_buffers[j]; + if (keys.index() == 0) { + auto edge_partition_key_first = thrust::make_transform_iterator( + std::get<0>(keys).begin(), + cuda::proclaim_return_type( + [range_first = local_v_list_range_firsts[partition_idx]] __device__( + uint32_t v_offset) { return range_first + static_cast(v_offset); })); + per_v_transform_reduce_e_edge_partition( + handle, + edge_partition, + edge_partition_key_first, + edge_partition_key_first + std::get<0>(keys).size(), + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_buffer, + e_op, + major_init, + major_identity_element, + reduce_op, + pred_op, + key_segment_offsets ? std::make_optional>( + (*key_segment_offsets).data(), (*key_segment_offsets).size()) + : std::nullopt, + edge_partition_stream_pool_indices); + processed = true; + } + } + if (!processed) { + auto edge_partition_key_first = sorted_unique_key_first; + auto edge_partition_key_last = sorted_unique_nzd_key_last; + if constexpr (GraphViewType::is_multi_gpu && use_input_key) { + auto const& keys = edge_partition_key_buffers[j]; + if constexpr (try_bitmap) { + edge_partition_key_first = get_dataframe_buffer_begin(std::get<1>(keys)); + edge_partition_key_last = get_dataframe_buffer_end(std::get<1>(keys)); + } else { + edge_partition_key_first = get_dataframe_buffer_begin(keys); + edge_partition_key_last = get_dataframe_buffer_end(keys); + } + } + + per_v_transform_reduce_e_edge_partition( + handle, + edge_partition, + edge_partition_key_first, + edge_partition_key_last, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_e_mask, + output_buffer, + e_op, + major_init, + major_identity_element, + reduce_op, + pred_op, + key_segment_offsets ? std::make_optional>( + (*key_segment_offsets).data(), (*key_segment_offsets).size()) + : std::nullopt, + edge_partition_stream_pool_indices); + } + } + } + if (stream_pool_indices) { handle.sync_stream_pool(*stream_pool_indices); } + + if constexpr (GraphViewType::is_multi_gpu && update_major) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + + if constexpr (use_input_key) { + edge_partition_key_buffers.clear(); + edge_partition_key_buffers.shrink_to_fit(); + } + + if constexpr (std::is_same_v>) { + std::conditional_t< + filter_input_key, + std::optional, raft::device_span>>>, + std::byte /* dummy */> + edge_partition_hypersparse_non_deg1_key_offset_spans{}; + if constexpr (filter_input_key) { + if (edge_partition_hypersparse_key_offset_vectors) { + edge_partition_hypersparse_non_deg1_key_offset_spans = std::vector< + std::variant, raft::device_span>>( + loop_count); + } + } + + std::vector edge_partition_allreduce_sizes(loop_count); + std::vector edge_partition_allreduce_displacements(loop_count); + std::vector edge_partition_contiguous_sizes(loop_count); + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto const& output_buffer = edge_partition_major_output_buffers[j]; + + size_t allreduce_size{}; + size_t contiguous_size{}; + if constexpr (filter_input_key) { + allreduce_size = local_key_list_sizes[partition_idx]; + if (local_key_list_deg1_sizes) { + allreduce_size -= (*local_key_list_deg1_sizes)[partition_idx]; + } + if (key_segment_offset_vectors) { + auto const& key_segment_offsets = (*key_segment_offset_vectors)[partition_idx]; + contiguous_size = key_segment_offsets[3]; + } else { + contiguous_size = local_key_list_sizes[partition_idx]; + } + } else { + static_assert(!use_input_key); + auto hypersparse_degree_offsets = + graph_view.local_edge_partition_hypersparse_degree_offsets(partition_idx); + allreduce_size = size_dataframe_buffer(output_buffer); + if (hypersparse_degree_offsets) { + allreduce_size -= *((*hypersparse_degree_offsets).rbegin()) - + *((*hypersparse_degree_offsets).rbegin() + 1); + } + contiguous_size = size_dtaframe_buffer(output_buffer); + } + edge_partition_allreduce_sizes[j] = allreduce_size; + edge_partition_contiguous_sizes[j] = contiguous_size; + } + std::exclusive_scan(edge_partition_allreduce_sizes.begin(), + edge_partition_allreduce_sizes.end(), + edge_partition_allreduce_displacements.begin(), + size_t{0}); + std::variant, rmm::device_uvector> + aggregate_priorities = rmm::device_uvector(0, handle.get_stream()); + if (minor_comm_size <= std::numeric_limits::max()) { // priority == uint8_t + std::get<0>(aggregate_priorities) + .resize( + edge_partition_allreduce_displacements.back() + edge_partition_allreduce_sizes.back(), + handle.get_stream()); + } else { // priority == uint32_t + aggregate_priorities = rmm::device_uvector( + edge_partition_allreduce_displacements.back() + edge_partition_allreduce_sizes.back(), + handle.get_stream()); + } + if (loop_stream_pool_indices) { handle.sync_stream(); } + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + std::optional< + std::variant, raft::device_span>> + hypersparse_non_deg1_key_offsets{std::nullopt}; + if constexpr (filter_input_key) { + if (edge_partition_hypersparse_key_offset_vectors) { + auto const& offsets = (*edge_partition_hypersparse_key_offset_vectors)[j]; + + if (offsets.index() == 0) { + hypersparse_non_deg1_key_offsets = raft::device_span( + std::get<0>(offsets).data(), + std::get<0>(offsets).size() - + (edge_partition_deg1_hypersparse_key_offset_counts + ? (*edge_partition_deg1_hypersparse_key_offset_counts)[j] + : size_t{0})); + } else { + hypersparse_non_deg1_key_offsets = raft::device_span( + std::get<1>(offsets).data(), + std::get<1>(offsets).size() - + (edge_partition_deg1_hypersparse_key_offset_counts + ? (*edge_partition_deg1_hypersparse_key_offset_counts)[j] + : size_t{0})); + } + (*edge_partition_hypersparse_non_deg1_key_offset_spans)[j] = + *hypersparse_non_deg1_key_offsets; + } + } + + auto const& output_buffer = edge_partition_major_output_buffers[j]; + + if (minor_comm_size <= std::numeric_limits::max()) { // priority == uint8_t + compute_priorities( + minor_comm, + get_dataframe_buffer_begin(output_buffer), + raft::device_span(std::get<0>(aggregate_priorities).data() + + edge_partition_allreduce_displacements[j], + edge_partition_allreduce_sizes[j]), + hypersparse_non_deg1_key_offsets, + edge_partition_contiguous_sizes[j], + static_cast(partition_idx), + subgroup_size, + init, + process_local_edges[j] ? false : true /* ignore_local_values */, + loop_stream); + } else { // priority == uint32_t + compute_priorities( + minor_comm, + get_dataframe_buffer_begin(output_buffer), + raft::device_span(std::get<1>(aggregate_priorities).data() + + edge_partition_allreduce_displacements[j], + edge_partition_allreduce_sizes[j]), + hypersparse_non_deg1_key_offsets, + edge_partition_contiguous_sizes[j], + static_cast(partition_idx), + subgroup_size, + init, + process_local_edges[j] ? false : true /* ignore_local_values */, + loop_stream); + } + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + + if (minor_comm_size <= std::numeric_limits::max()) { // priority == uint8_t + device_allreduce(minor_comm, + std::get<0>(aggregate_priorities).data(), + std::get<0>(aggregate_priorities).data(), + std::get<0>(aggregate_priorities).size(), + raft::comms::op_t::MIN, + handle.get_stream()); + } else { // priority == uint32_t + device_allreduce(minor_comm, + std::get<1>(aggregate_priorities).data(), + std::get<1>(aggregate_priorities).data(), + std::get<1>(aggregate_priorities).size(), + raft::comms::op_t::MIN, + handle.get_stream()); + } + if (loop_stream_pool_indices) { handle.sync_stream(); } + + std::vector< + std::variant, rmm::device_uvector>, + std::optional>>> + edge_partition_selected_ranks_or_flags{}; + edge_partition_selected_ranks_or_flags.reserve(loop_count); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + auto const& output_buffer = edge_partition_major_output_buffers[j]; + std::optional< + std::variant, raft::device_span>> + hypersparse_non_deg1_key_offsets{std::nullopt}; + if constexpr (filter_input_key) { + if (edge_partition_hypersparse_key_offset_vectors) { + hypersparse_non_deg1_key_offsets = + (*edge_partition_hypersparse_non_deg1_key_offset_spans)[j]; + } + } + + auto contiguous_size = edge_partition_contiguous_sizes[j]; + + std::variant, rmm::device_uvector>, + std::optional>> + selected_ranks_or_flags = + std::variant, rmm::device_uvector>( + rmm::device_uvector(0, loop_stream)); + if (minor_comm_size <= std::numeric_limits::max()) { // priority == uint8_t + auto priorities = raft::device_span( + std::get<0>(aggregate_priorities).data() + edge_partition_allreduce_displacements[j], + edge_partition_allreduce_sizes[j]); + auto tmp = compute_selected_ranks_from_priorities( + minor_comm, + priorities, + hypersparse_non_deg1_key_offsets, + contiguous_size, + static_cast(partition_idx), + subgroup_size, + process_local_edges[j] ? false : true /* ignore_local_values */, + loop_stream); + if (tmp.index() == 0) { + selected_ranks_or_flags = + std::variant, rmm::device_uvector>( + std::move(std::get<0>(tmp))); + } else { + selected_ranks_or_flags = std::move(std::get<1>(tmp)); + } + } else { // priority_t == uint32_t + auto priorities = raft::device_span( + std::get<1>(aggregate_priorities).data() + edge_partition_allreduce_displacements[j], + edge_partition_allreduce_sizes[j]); + auto tmp = compute_selected_ranks_from_priorities( + minor_comm, + priorities, + hypersparse_non_deg1_key_offsets, + contiguous_size, + static_cast(partition_idx), + subgroup_size, + process_local_edges[j] ? false : true /* ignore_local_values */, + loop_stream); + if (tmp.index() == 0) { + selected_ranks_or_flags = + std::variant, rmm::device_uvector>( + std::move(std::get<0>(tmp))); + } else { + selected_ranks_or_flags = std::move(std::get<1>(tmp)); + } + } + edge_partition_selected_ranks_or_flags.push_back(std::move(selected_ranks_or_flags)); + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + if (minor_comm_size <= std::numeric_limits::max()) { // priority == uint8_t + std::get<0>(aggregate_priorities).resize(0, handle.get_stream()); + std::get<0>(aggregate_priorities).shrink_to_fit(handle.get_stream()); + } else { + std::get<1>(aggregate_priorities).resize(0, handle.get_stream()); + std::get<1>(aggregate_priorities).shrink_to_fit(handle.get_stream()); + } + if (loop_stream_pool_indices) { handle.sync_stream(); } + + std::vector> edge_partition_values{}; + edge_partition_values.reserve(loop_count); + + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + auto& output_buffer = edge_partition_major_output_buffers[j]; + + auto values = allocate_dataframe_buffer( + process_local_edges[j] ? size_dataframe_buffer(output_buffer) : size_t{0}, loop_stream); + if (process_local_edges[j]) { + if (minor_comm_rank == static_cast(partition_idx)) { + assert(!use_input_key); + assert(edge_partition_selected_ranks_or_flags[j].index() == 0); + auto const& selected_ranks = std::get<0>(edge_partition_selected_ranks_or_flags[j]); + if (selected_ranks.index() == 0) { + copy_if_nosync( + get_dataframe_buffer_begin(output_buffer), + get_dataframe_buffer_begin(output_buffer) + edge_partition_allreduce_sizes[j], + thrust::make_transform_iterator( + std::get<0>(selected_ranks).begin(), + cuda::proclaim_return_type([minor_comm_rank] __device__(auto rank) { + return static_cast(rank) == minor_comm_rank; + })), + get_dataframe_buffer_begin(values), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } else { + copy_if_nosync( + get_dataframe_buffer_begin(output_buffer), + get_dataframe_buffer_begin(output_buffer) + edge_partition_allreduce_sizes[j], + thrust::make_transform_iterator( + std::get<1>(selected_ranks).begin(), + cuda::proclaim_return_type( + [minor_comm_rank] __device__(auto rank) { return rank == minor_comm_rank; })), + get_dataframe_buffer_begin(values), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } + } else { + assert(edge_partition_selected_ranks_or_flags[j].index() == 1); + auto& keep_flags = std::get<1>(edge_partition_selected_ranks_or_flags[j]); + size_t input_end_offset{}; + if constexpr (filter_input_key) { + input_end_offset = edge_partition_contiguous_sizes[j]; + if (edge_partition_hypersparse_non_deg1_key_offset_spans) { + auto const& span = (*edge_partition_hypersparse_non_deg1_key_offset_spans)[j]; + if (span.index() == 0) { + input_end_offset += std::get<0>(span).size(); + } else { + input_end_offset += std::get<1>(span).size(); + } + } + } else { + input_end_offset = edge_partition_allreduce_sizes[j]; + } + copy_if_nosync( + get_dataframe_buffer_begin(output_buffer), + get_dataframe_buffer_begin(output_buffer) + input_end_offset, + thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [keep_flags = raft::device_span( + (*keep_flags).data(), (*keep_flags).size())] __device__(size_t offset) { + auto word = keep_flags[packed_bool_offset(offset)]; + return ((word & packed_bool_mask(offset)) != packed_bool_empty_mask()); + })), + get_dataframe_buffer_begin(values), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + (*keep_flags).resize(0, loop_stream); + (*keep_flags).shrink_to_fit(loop_stream); + } + } + + edge_partition_values.push_back(std::move(values)); + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + + std::vector copy_sizes(loop_count); + raft::update_host(copy_sizes.data(), counters.data(), loop_count, handle.get_stream()); + handle.sync_stream(); + + std::optional< + std::vector, rmm::device_uvector>>> + edge_partition_deg1_hypersparse_output_offset_vectors{}; + + if (graph_view.use_dcs()) { + edge_partition_deg1_hypersparse_output_offset_vectors = + std::vector, rmm::device_uvector>>{}; + (*edge_partition_deg1_hypersparse_output_offset_vectors).reserve(loop_count); + + for (size_t j = 0; j < loop_count; ++j) { + auto loop_stream = + loop_stream_pool_indices + ? handle.get_stream_from_stream_pool((*loop_stream_pool_indices)[j]) + : handle.get_stream(); + + auto& output_buffer = edge_partition_major_output_buffers[j]; + std::variant, rmm::device_uvector> + output_offsets = rmm::device_uvector(0, loop_stream); + if (!uint32_key_output_offset) { + output_offsets = rmm::device_uvector(0, loop_stream); + } + + if (process_local_edges[j]) { + auto& values = edge_partition_values[j]; + + size_t output_offset_buf_size{0}; + if constexpr (filter_input_key) { + output_offset_buf_size = (*edge_partition_deg1_hypersparse_key_offset_counts)[j]; + } else { + assert(!use_input_key); + output_offset_buf_size = + size_dataframe_buffer(output_buffer) - edge_partition_allreduce_sizes[j]; + } + + if (output_offsets.index() == 0) { + std::get<0>(output_offsets).resize(output_offset_buf_size, loop_stream); + } else { + output_offsets = rmm::device_uvector(output_offset_buf_size, loop_stream); + } + + size_t input_start_offset{}; + if constexpr (filter_input_key) { + auto span = (*edge_partition_hypersparse_non_deg1_key_offset_spans)[j]; + input_start_offset = + edge_partition_contiguous_sizes[j] + + (span.index() == 0 ? std::get<0>(span).size() : std::get<1>(span).size()); + } else { + static_assert(!use_input_key); + input_start_offset = edge_partition_allreduce_sizes[j]; + } + auto flag_first = thrust::make_transform_iterator( + get_dataframe_buffer_begin(output_buffer) + input_start_offset, + cuda::proclaim_return_type( + [init] __device__(auto val) { return val != init; })); + + if constexpr (filter_input_key) { + auto& hypersparse_key_offsets = (*edge_partition_hypersparse_key_offset_vectors)[j]; + auto span = (*edge_partition_hypersparse_non_deg1_key_offset_spans)[j]; + if (hypersparse_key_offsets.index() == 0) { + assert(output_offsets.index() == 0); + auto input_pair_first = thrust::make_zip_iterator( + get_dataframe_buffer_begin(output_buffer) + input_start_offset, + std::get<0>(hypersparse_key_offsets).begin() + std::get<0>(span).size()); + copy_if_nosync( + input_pair_first, + input_pair_first + (*edge_partition_deg1_hypersparse_key_offset_counts)[j], + flag_first, + thrust::make_zip_iterator(get_dataframe_buffer_begin(values) + copy_sizes[j], + std::get<0>(output_offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + std::get<0>(hypersparse_key_offsets).resize(0, loop_stream); + std::get<0>(hypersparse_key_offsets).shrink_to_fit(loop_stream); + } else { + assert(output_offsets.index() == 1); + auto input_pair_first = thrust::make_zip_iterator( + get_dataframe_buffer_begin(output_buffer) + input_start_offset, + std::get<1>(hypersparse_key_offsets).begin() + std::get<1>(span).size()); + copy_if_nosync( + input_pair_first, + input_pair_first + (*edge_partition_deg1_hypersparse_key_offset_counts)[j], + flag_first, + thrust::make_zip_iterator(get_dataframe_buffer_begin(values) + copy_sizes[j], + std::get<1>(output_offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + std::get<1>(hypersparse_key_offsets).resize(0, loop_stream); + std::get<1>(hypersparse_key_offsets).shrink_to_fit(loop_stream); + } + } else { + static_assert(!use_input_key); + assert(process_local_edges[j]); + if (output_offsets.index() == 0) { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(output_buffer), + thrust::make_counting_iterator(uint32_t{0})); + copy_if_nosync( + input_pair_first + input_start_offset, + input_pair_first + size_dataframe_buffer(output_buffer), + flag_first, + thrust::make_zip_iterator(get_dataframe_buffer_begin(values) + copy_sizes[j], + std::get<0>(output_offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } else { + auto input_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(output_buffer), + thrust::make_counting_iterator(size_t{0})); + copy_if_nosync( + input_pair_first + input_start_offset, + input_pair_first + size_dataframe_buffer(output_buffer), + flag_first, + thrust::make_zip_iterator(get_dataframe_buffer_begin(values) + copy_sizes[j], + std::get<1>(output_offsets).begin()), + raft::device_span(counters.data() + j, size_t{1}), + loop_stream); + } + } + } + + (*edge_partition_deg1_hypersparse_output_offset_vectors) + .push_back(std::move(output_offsets)); + + resize_dataframe_buffer(output_buffer, 0, loop_stream); + shrink_to_fit_dataframe_buffer(output_buffer, loop_stream); + } + if (loop_stream_pool_indices) { handle.sync_stream_pool(*loop_stream_pool_indices); } + + std::vector deg1_copy_sizes(loop_count); + raft::update_host( + deg1_copy_sizes.data(), counters.data(), loop_count, handle.get_stream()); + handle.sync_stream(); + + for (size_t j = 0; j < loop_count; ++j) { + if (process_local_edges[j]) { + copy_sizes[j] += deg1_copy_sizes[j]; + auto& offsets = (*edge_partition_deg1_hypersparse_output_offset_vectors)[j]; + if (offsets.index() == 0) { + std::get<0>(offsets).resize(deg1_copy_sizes[j], handle.get_stream()); + } else { + assert(offsets.index() == 1); + std::get<1>(offsets).resize(deg1_copy_sizes[j], handle.get_stream()); + } + // skip shrink_to_fit() to cut execution time + } + } + } + + for (size_t j = 0; j < loop_count; ++j) { + if (process_local_edges[j]) { + resize_dataframe_buffer(edge_partition_values[j], copy_sizes[j], handle.get_stream()); + // skip shrink_to_fit() to cut execution time + } + } + + size_t min_element_size{cache_line_size}; + if constexpr (std::is_arithmetic_v) { + min_element_size = std::min(sizeof(T), min_element_size); + } else { + static_assert(is_thrust_tuple_of_arithmetic::value); + min_element_size = + std::min(cugraph::min_thrust_tuple_element_sizes(), min_element_size); + } + assert((cache_line_size % min_element_size) == 0); + size_t value_alignment = cache_line_size / min_element_size; + + size_t offset_alignment = 1; + if (graph_view.use_dcs()) { + static_assert(((cache_line_size % sizeof(uint32_t)) == 0) && + ((cache_line_size % sizeof(size_t)) == 0)); + offset_alignment = + cache_line_size / (uint32_key_output_offset ? sizeof(uint32_t) : sizeof(size_t)); + } + + std::optional> rx_value_sizes{}; + std::optional> rx_value_displs{}; + std::optional> rx_values{}; + + std::optional> rx_offset_sizes{}; + std::optional> rx_offset_displs{}; + std::optional, rmm::device_uvector>> + rx_offsets{}; + { + auto size_per_rank = + loop_count * (graph_view.use_dcs() ? 2 /* value buffer size, offset buffer size */ + : 1 /* value buffer size */); + rmm::device_uvector d_aggregate_buffer_sizes(minor_comm_size * size_per_rank, + handle.get_stream()); + std::vector h_buffer_sizes(size_per_rank); + for (size_t j = 0; j < loop_count; ++j) { + h_buffer_sizes[j] = size_dataframe_buffer(edge_partition_values[j]); + if (graph_view.use_dcs()) { + auto const& offsets = (*edge_partition_deg1_hypersparse_output_offset_vectors)[j]; + if (offsets.index() == 0) { + h_buffer_sizes[loop_count + j] = std::get<0>(offsets).size(); + } else { + assert(offsets.index() == 1); + h_buffer_sizes[loop_count + j] = std::get<1>(offsets).size(); + } + } + } + raft::update_device(d_aggregate_buffer_sizes.data() + minor_comm_rank * size_per_rank, + h_buffer_sizes.data(), + h_buffer_sizes.size(), + handle.get_stream()); + device_allgather(minor_comm, + d_aggregate_buffer_sizes.data() + minor_comm_rank * size_per_rank, + d_aggregate_buffer_sizes.data(), + size_per_rank, + handle.get_stream()); + if (static_cast(minor_comm_rank / num_concurrent_loops) == + (i / num_concurrent_loops)) { + std::vector h_aggregate_buffer_sizes(d_aggregate_buffer_sizes.size()); + raft::update_host(h_aggregate_buffer_sizes.data(), + d_aggregate_buffer_sizes.data(), + d_aggregate_buffer_sizes.size(), + handle.get_stream()); + handle.sync_stream(); + auto j = static_cast(minor_comm_rank % num_concurrent_loops); + rx_value_sizes = std::vector(minor_comm_size); + rx_value_displs = std::vector(minor_comm_size); + if (graph_view.use_dcs()) { + rx_offset_sizes = std::vector(minor_comm_size); + rx_offset_displs = std::vector(minor_comm_size); + } + for (int k = 0; k < minor_comm_size; ++k) { + (*rx_value_sizes)[k] = h_aggregate_buffer_sizes[k * size_per_rank + j]; + if (graph_view.use_dcs()) { + (*rx_offset_sizes)[k] = + h_aggregate_buffer_sizes[k * size_per_rank + loop_count + j]; + } + } + + std::vector aligned_sizes(minor_comm_size); + for (int k = 0; k < minor_comm_size; ++k) { + if (k == (minor_comm_size - 1)) { + aligned_sizes[k] = (*rx_value_sizes)[k]; + } else { + aligned_sizes[k] = raft::round_up_safe((*rx_value_sizes)[k], value_alignment); + } + } + std::exclusive_scan( + aligned_sizes.begin(), aligned_sizes.end(), (*rx_value_displs).begin(), size_t{0}); + + if (graph_view.use_dcs()) { + for (int k = 0; k < minor_comm_size; ++k) { + if (k == (minor_comm_size - 1)) { + aligned_sizes[k] = (*rx_offset_sizes)[k]; + } else { + aligned_sizes[k] = raft::round_up_safe((*rx_offset_sizes)[k], offset_alignment); + } + } + std::exclusive_scan( + aligned_sizes.begin(), aligned_sizes.end(), (*rx_offset_displs).begin(), size_t{0}); + } + + rx_values = allocate_dataframe_buffer( + (*rx_value_displs).back() + (*rx_value_sizes).back(), handle.get_stream()); + if (graph_view.use_dcs()) { + if (uint32_key_output_offset) { + rx_offsets = rmm::device_uvector( + (*rx_offset_displs).back() + (*rx_offset_sizes).back(), handle.get_stream()); + } else { + rx_offsets = rmm::device_uvector( + (*rx_offset_displs).back() + (*rx_offset_sizes).back(), handle.get_stream()); + } + } + } + } + + device_group_start(minor_comm); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto& values = edge_partition_values[j]; + + if (minor_comm_rank == static_cast(partition_idx)) { + device_gatherv(minor_comm, + get_dataframe_buffer_begin(values), + get_dataframe_buffer_begin(*rx_values), + values.size(), + *rx_value_sizes, + *rx_value_displs, + static_cast(partition_idx), + handle.get_stream()); + } else { + device_gatherv(minor_comm, + get_dataframe_buffer_begin(values), + dataframe_buffer_iterator_type_t{}, + values.size(), + std::vector{}, + std::vector{}, + static_cast(partition_idx), + handle.get_stream()); + } + } + device_group_end(minor_comm); + if (graph_view.use_dcs()) { + device_group_start(minor_comm); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto& values = edge_partition_values[j]; + + auto const& offsets = (*edge_partition_deg1_hypersparse_output_offset_vectors)[j]; + if (offsets.index() == 0) { + if (minor_comm_rank == static_cast(partition_idx)) { + device_gatherv(minor_comm, + std::get<0>(offsets).data(), + std::get<0>(*rx_offsets).data(), + std::get<0>(offsets).size(), + *rx_offset_sizes, + *rx_offset_displs, + static_cast(partition_idx), + handle.get_stream()); + } else { + device_gatherv(minor_comm, + std::get<0>(offsets).data(), + static_cast(nullptr), + std::get<0>(offsets).size(), + std::vector{}, + std::vector{}, + static_cast(partition_idx), + handle.get_stream()); + } + } else { + assert(offsets.index() == 1); + if (minor_comm_rank == static_cast(partition_idx)) { + device_gatherv(minor_comm, + std::get<1>(offsets).data(), + std::get<1>(*rx_offsets).data(), + std::get<1>(offsets).size(), + *rx_offset_sizes, + *rx_offset_displs, + static_cast(partition_idx), + handle.get_stream()); + } else { + device_gatherv(minor_comm, + std::get<1>(offsets).data(), + static_cast(nullptr), + std::get<1>(offsets).size(), + std::vector{}, + std::vector{}, + static_cast(partition_idx), + handle.get_stream()); + } + } + } + device_group_end(minor_comm); + } + handle.sync_stream(); // this is required before edge_partition_values.clear(); + edge_partition_values.clear(); + if (loop_stream_pool_indices) { + handle.sync_stream_pool(*loop_stream_pool_indices); + } // to ensure that memory is freed + + if (rx_values && (size_dataframe_buffer(*rx_values) > 0)) { + auto j = static_cast(minor_comm_rank % num_concurrent_loops); + auto partition_idx = i + j; + + { // remove gaps introduced to enforce alignment + rmm::device_uvector bitmap( + packed_bool_size(size_dataframe_buffer(*rx_values)), handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), bitmap.begin(), bitmap.end(), packed_bool_empty_mask()); + rmm::device_uvector d_displs((*rx_value_displs).size(), handle.get_stream()); + rmm::device_uvector d_sizes((*rx_value_sizes).size(), handle.get_stream()); + raft::update_device(d_displs.data(), + (*rx_value_displs).data(), + (*rx_value_displs).size(), + handle.get_stream()); + raft::update_device(d_sizes.data(), + (*rx_value_sizes).data(), + (*rx_value_sizes).size(), + handle.get_stream()); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(static_cast(minor_comm_size - 1) * + value_alignment), + [bitmap = raft::device_span(bitmap.data(), bitmap.size()), + displs = raft::device_span(d_displs.data(), d_displs.size()), + sizes = raft::device_span(d_sizes.data(), d_sizes.size()), + alignment = value_alignment] __device__(size_t i) { + auto rank = static_cast(i / alignment); + auto first = displs[rank] + sizes[rank]; + auto last = displs[rank + 1]; + if ((i % alignment) < (last - first)) { + auto offset = first + (i % alignment); + cuda::atomic_ref word( + bitmap[packed_bool_offset(offset)]); + word.fetch_or(packed_bool_mask(offset), cuda::std::memory_order_relaxed); + } + }); + resize_dataframe_buffer( + *rx_values, + thrust::distance( + get_dataframe_buffer_begin(*rx_values), + thrust::remove_if(handle.get_thrust_policy(), + get_dataframe_buffer_begin(*rx_values), + get_dataframe_buffer_end(*rx_values), + thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [bitmap = raft::device_span( + bitmap.data(), bitmap.size())] __device__(size_t i) { + return (bitmap[packed_bool_offset(i)] & + packed_bool_mask(i)) == packed_bool_mask(i); + })), + thrust::identity{})), + handle.get_stream()); + // skip shrink_to_fit() to cut execution time + std::exclusive_scan((*rx_value_sizes).begin(), + (*rx_value_sizes).end(), + (*rx_value_displs).begin(), + size_t{0}); // now gaps are removed + + if (rx_offsets) { + size_t num_offsets = ((*rx_offsets).index() == 0) + ? size_dataframe_buffer(std::get<0>(*rx_offsets)) + : size_dataframe_buffer(std::get<1>(*rx_offsets)); + bitmap.resize(packed_bool_size(num_offsets), handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), bitmap.begin(), bitmap.end(), packed_bool_empty_mask()); + d_displs.resize((*rx_offset_displs).size(), handle.get_stream()); + d_sizes.resize((*rx_offset_sizes).size(), handle.get_stream()); + raft::update_device(d_displs.data(), + (*rx_offset_displs).data(), + (*rx_offset_displs).size(), + handle.get_stream()); + raft::update_device(d_sizes.data(), + (*rx_offset_sizes).data(), + (*rx_offset_sizes).size(), + handle.get_stream()); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(static_cast(minor_comm_size - 1) * + offset_alignment), + [bitmap = raft::device_span(bitmap.data(), bitmap.size()), + displs = raft::device_span(d_displs.data(), d_displs.size()), + sizes = raft::device_span(d_sizes.data(), d_sizes.size()), + alignment = offset_alignment] __device__(size_t i) { + auto rank = static_cast(i / alignment); + auto first = displs[rank] + sizes[rank]; + auto last = displs[rank + 1]; + if ((i % alignment) < (last - first)) { + auto offset = first + (i % alignment); + cuda::atomic_ref word( + bitmap[packed_bool_offset(offset)]); + word.fetch_or(packed_bool_mask(offset), cuda::std::memory_order_relaxed); + } + }); + if ((*rx_offsets).index() == 0) { + resize_dataframe_buffer( + std::get<0>(*rx_offsets), + thrust::distance( + get_dataframe_buffer_begin(std::get<0>(*rx_offsets)), + thrust::remove_if(handle.get_thrust_policy(), + get_dataframe_buffer_begin(std::get<0>(*rx_offsets)), + get_dataframe_buffer_end(std::get<0>(*rx_offsets)), + thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [bitmap = raft::device_span( + bitmap.data(), bitmap.size())] __device__(size_t i) { + return (bitmap[packed_bool_offset(i)] & + packed_bool_mask(i)) == packed_bool_mask(i); + })), + thrust::identity{})), + handle.get_stream()); + // skip shrink_to_fit() to cut execution time + } else { + resize_dataframe_buffer( + std::get<1>(*rx_offsets), + thrust::distance( + get_dataframe_buffer_begin(std::get<1>(*rx_offsets)), + thrust::remove_if(handle.get_thrust_policy(), + get_dataframe_buffer_begin(std::get<1>(*rx_offsets)), + get_dataframe_buffer_end(std::get<1>(*rx_offsets)), + thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [bitmap = raft::device_span( + bitmap.data(), bitmap.size())] __device__(size_t i) { + return (bitmap[packed_bool_offset(i)] & + packed_bool_mask(i)) == packed_bool_mask(i); + })), + thrust::identity{})), + handle.get_stream()); + // skip shrink_to_fit() to cut execution time + } + std::exclusive_scan((*rx_offset_sizes).begin(), + (*rx_offset_sizes).end(), + (*rx_offset_displs).begin(), + size_t{0}); // now gaps are removed + } + } + + size_t output_range_size{}; + if constexpr (filter_input_key) { + output_range_size = local_key_list_sizes[partition_idx]; + } else { + auto const& segment_offsets = graph_view.local_vertex_partition_segment_offsets(); + output_range_size = + segment_offsets + ? *((*segment_offsets).rbegin() + 1) /* exclude the zero degree segment */ + : graph_view.local_vertex_partition_range_size(); + } + auto& selected_ranks = std::get<0>(edge_partition_selected_ranks_or_flags[j]); + if (selected_ranks.index() == 0) { + auto old_size = std::get<0>(selected_ranks).size(); + std::get<0>(selected_ranks).resize(output_range_size, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + std::get<0>(selected_ranks).begin() + old_size, + std::get<0>(selected_ranks).end(), + static_cast(minor_comm_size)); + } else { + assert(selected_ranks.index() == 1); + auto old_size = std::get<1>(selected_ranks).size(); + std::get<1>(selected_ranks).resize(output_range_size, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + std::get<1>(selected_ranks).begin() + old_size, + std::get<1>(selected_ranks).end(), + minor_comm_size); + } + if (rx_offsets) { + rmm::device_uvector lasts((*rx_offset_displs).size(), handle.get_stream()); + raft::update_device(lasts.data(), + (*rx_offset_displs).data() + 1, + (*rx_offset_displs).size() - 1, + handle.get_stream()); + auto num_elements = (*rx_offset_displs).back() + (*rx_offset_sizes).back(); + lasts.set_element_async(lasts.size() - 1, num_elements, handle.get_stream()); + handle.sync_stream(); // this is necessary before num_elements becomes out-of-scope + + if ((*rx_offsets).index() == 0) { + auto& offsets = std::get<0>(*rx_offsets); + if (selected_ranks.index() == 0) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(offsets.size()), + [offsets = raft::device_span(offsets.data(), offsets.size()), + lasts = raft::device_span(lasts.data(), lasts.size()), + selected_ranks = raft::device_span( + std::get<0>(selected_ranks).data(), + std::get<0>(selected_ranks).size())] __device__(auto i) { + auto minor_comm_rank = static_cast(thrust::distance( + lasts.begin(), + thrust::upper_bound(thrust::seq, lasts.begin(), lasts.end(), i))); + selected_ranks[offsets[i]] = minor_comm_rank; + }); + } else { + assert(selected_ranks.index() == 1); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(offsets.size()), + [offsets = raft::device_span(offsets.data(), offsets.size()), + lasts = raft::device_span(lasts.data(), lasts.size()), + selected_ranks = raft::device_span( + std::get<1>(selected_ranks).data(), + std::get<1>(selected_ranks).size())] __device__(auto i) { + auto minor_comm_rank = static_cast(thrust::distance( + lasts.begin(), + thrust::upper_bound(thrust::seq, lasts.begin(), lasts.end(), i))); + selected_ranks[offsets[i]] = minor_comm_rank; + }); + } + offsets.resize(0, handle.get_stream()); + offsets.shrink_to_fit(handle.get_stream()); + } else { + assert((*rx_offsets).index() == 1); + auto& offsets = std::get<1>(*rx_offsets); + if (selected_ranks.index() == 0) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(offsets.size()), + [offsets = raft::device_span(offsets.data(), offsets.size()), + lasts = raft::device_span(lasts.data(), lasts.size()), + selected_ranks = raft::device_span( + std::get<0>(selected_ranks).data(), + std::get<0>(selected_ranks).size())] __device__(auto i) { + auto minor_comm_rank = static_cast(thrust::distance( + lasts.begin(), + thrust::upper_bound(thrust::seq, lasts.begin(), lasts.end(), i))); + selected_ranks[offsets[i]] = minor_comm_rank; + }); + } else { + assert(selected_ranks.index() == 1); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(offsets.size()), + [offsets = raft::device_span(offsets.data(), offsets.size()), + lasts = raft::device_span(lasts.data(), lasts.size()), + selected_ranks = raft::device_span( + std::get<1>(selected_ranks).data(), + std::get<1>(selected_ranks).size())] __device__(auto i) { + auto minor_comm_rank = static_cast(thrust::distance( + lasts.begin(), + thrust::upper_bound(thrust::seq, lasts.begin(), lasts.end(), i))); + selected_ranks[offsets[i]] = minor_comm_rank; + }); + } + offsets.resize(0, handle.get_stream()); + offsets.shrink_to_fit(handle.get_stream()); + } + } + + size_t num_positions = (selected_ranks.index() == 0) ? std::get<0>(selected_ranks).size() + : std::get<1>(selected_ranks).size(); + if (num_positions <= static_cast(std::numeric_limits::max())) { + rmm::device_uvector rx_positions(num_positions, handle.get_stream()); + thrust::sequence( + handle.get_thrust_policy(), rx_positions.begin(), rx_positions.end(), uint32_t{0}); + if (selected_ranks.index() == 0) { + thrust::stable_sort_by_key(handle.get_thrust_policy(), + std::get<0>(selected_ranks).begin(), + std::get<0>(selected_ranks).end(), + rx_positions.begin()); + } else { + assert(selected_ranks.index() == 1); + thrust::stable_sort_by_key(handle.get_thrust_policy(), + std::get<1>(selected_ranks).begin(), + std::get<1>(selected_ranks).end(), + rx_positions.begin()); + } + // selected_ranks[] == minor_comm_size if no GPU in minor_comm has a non-init value + rx_positions.resize((*rx_value_displs).back() + (*rx_value_sizes).back(), + handle.get_stream()); + thrust::scatter(handle.get_thrust_policy(), + get_dataframe_buffer_begin(*rx_values), + get_dataframe_buffer_end(*rx_values), + rx_positions.begin(), + tmp_vertex_value_output_first); + } else { + rmm::device_uvector rx_positions(num_positions, handle.get_stream()); + thrust::sequence( + handle.get_thrust_policy(), rx_positions.begin(), rx_positions.end(), size_t{0}); + if (selected_ranks.index() == 0) { + thrust::stable_sort_by_key(handle.get_thrust_policy(), + std::get<0>(selected_ranks).begin(), + std::get<0>(selected_ranks).end(), + rx_positions.begin()); + } else { + assert(selected_ranks.index() == 1); + thrust::stable_sort_by_key(handle.get_thrust_policy(), + std::get<1>(selected_ranks).begin(), + std::get<1>(selected_ranks).end(), + rx_positions.begin()); + } + // selected_ranks[] == minor_comm_size if no GPU in minor_comm has a non-init value + rx_positions.resize((*rx_value_displs).back() + (*rx_value_sizes).back(), + handle.get_stream()); + thrust::scatter(handle.get_thrust_policy(), + get_dataframe_buffer_begin(*rx_values), + get_dataframe_buffer_end(*rx_values), + rx_positions.begin(), + tmp_vertex_value_output_first); + } + } + handle.sync_stream(); + } else { + device_group_start(minor_comm); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + + device_reduce(minor_comm, + get_dataframe_buffer_begin(edge_partition_major_output_buffers[j]), + tmp_vertex_value_output_first, + size_dataframe_buffer(edge_partition_major_output_buffers[j]), + ReduceOp::compatible_raft_comms_op, + static_cast(partition_idx), + handle.get_stream()); + } + device_group_end(minor_comm); + if (loop_stream_pool_indices) { handle.sync_stream(); } + } + } + } + + // 10. communication + + if constexpr (GraphViewType::is_multi_gpu && !update_major) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_rank = major_comm.get_rank(); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + + auto view = minor_tmp_buffer->view(); + if (view.keys()) { // applying the initial value is deferred to here + vertex_t max_vertex_partition_size{0}; + for (int i = 0; i < major_comm_size; ++i) { + auto this_segment_vertex_partition_id = + compute_local_edge_partition_minor_range_vertex_partition_id_t{ + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); + max_vertex_partition_size = + std::max(max_vertex_partition_size, + graph_view.vertex_partition_range_size(this_segment_vertex_partition_id)); + } + auto tx_buffer = allocate_dataframe_buffer(max_vertex_partition_size, handle.get_stream()); + auto tx_buffer_first = get_dataframe_buffer_begin(tx_buffer); + std::optional> minor_key_offsets{}; + if constexpr (GraphViewType::is_storage_transposed) { + minor_key_offsets = graph_view.local_sorted_unique_edge_src_vertex_partition_offsets(); + } else { + minor_key_offsets = graph_view.local_sorted_unique_edge_dst_vertex_partition_offsets(); + } + for (int i = 0; i < major_comm_size; ++i) { + auto minor_init = (major_comm_rank == i) ? init : ReduceOp::identity_element; + auto this_segment_vertex_partition_id = + compute_local_edge_partition_minor_range_vertex_partition_id_t{ + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); + thrust::fill(handle.get_thrust_policy(), + tx_buffer_first, + tx_buffer_first + + graph_view.vertex_partition_range_size(this_segment_vertex_partition_id), + minor_init); + auto value_first = thrust::make_transform_iterator( + view.value_first(), + cuda::proclaim_return_type( + [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); })); + thrust::scatter(handle.get_thrust_policy(), + value_first + (*minor_key_offsets)[i], + value_first + (*minor_key_offsets)[i + 1], + thrust::make_transform_iterator( + (*(view.keys())).begin() + (*minor_key_offsets)[i], + cuda::proclaim_return_type( + [key_first = graph_view.vertex_partition_range_first( + this_segment_vertex_partition_id)] __device__(auto key) { + return key - key_first; + })), + tx_buffer_first); + device_reduce(major_comm, + tx_buffer_first, + tmp_vertex_value_output_first, + static_cast( + graph_view.vertex_partition_range_size(this_segment_vertex_partition_id)), + ReduceOp::compatible_raft_comms_op, + i, + handle.get_stream()); + } + } else { + auto first_segment_vertex_partition_id = + compute_local_edge_partition_minor_range_vertex_partition_id_t{ + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(0); + vertex_t minor_range_first = + graph_view.vertex_partition_range_first(first_segment_vertex_partition_id); + for (int i = 0; i < major_comm_size; ++i) { + auto this_segment_vertex_partition_id = + compute_local_edge_partition_minor_range_vertex_partition_id_t{ + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); + auto offset = graph_view.vertex_partition_range_first(this_segment_vertex_partition_id) - + minor_range_first; + device_reduce(major_comm, + view.value_first() + offset, + tmp_vertex_value_output_first, + static_cast( + graph_view.vertex_partition_range_size(this_segment_vertex_partition_id)), + ReduceOp::compatible_raft_comms_op, + i, + handle.get_stream()); + } + } + } +} + +} // namespace detail + +} // namespace cugraph diff --git a/cpp/src/prims/detail/prim_functors.cuh b/cpp/src/prims/detail/prim_functors.cuh index f426cd993ea..a166f37906a 100644 --- a/cpp/src/prims/detail/prim_functors.cuh +++ b/cpp/src/prims/detail/prim_functors.cuh @@ -21,6 +21,23 @@ namespace cugraph { namespace detail { +template +struct const_true_e_op_t { + __device__ auto operator()(std::conditional_t key_or_src, + std::conditional_t key_or_dst, + src_value_t, + dst_value_t, + e_value_t) const + { + return true; + } +}; + template +struct call_const_true_e_op_t { + __device__ auto operator()(edge_t i) const { return true; } +}; + template +#include + +namespace cugraph { + +namespace detail { + +template +__host__ __device__ priority_t +rank_to_priority(int rank, + int root, + int subgroup_size /* faster interconnect within a subgroup */, + int comm_size, + vertex_t offset /* to evenly distribute traffic */) +{ + static_assert(sizeof(priority_t) == 1 || sizeof(priority_t) == 2 || sizeof(priority_t) == 4); + using cast_t = std::conditional_t< + sizeof(priority_t) == 1, + int16_t, + std::conditional_t>; // to prevent overflow + + if (rank == root) { + return priority_t{0}; + } else if (rank / subgroup_size == + root / subgroup_size) { // intra-subgroup communication is sufficient (priorities in + // [1, subgroup_size) + auto rank_dist = + static_cast(((static_cast(rank) + subgroup_size) - root) % subgroup_size); + int modulo = subgroup_size - 1; + return static_cast(1 + (static_cast(rank_dist - 1) + (offset % modulo)) % + modulo); + } else { // inter-subgroup communication is necessary (priorities in [subgroup_size, comm_size) + auto subgroup_dist = + static_cast(((static_cast(rank / subgroup_size) + (comm_size / subgroup_size)) - + (root / subgroup_size)) % + (comm_size / subgroup_size)); + auto intra_subgroup_rank_dist = static_cast( + ((static_cast(rank % subgroup_size) + subgroup_size) - (root % subgroup_size)) % + subgroup_size); + auto rank_dist = subgroup_dist * subgroup_size + intra_subgroup_rank_dist; + int modulo = comm_size - subgroup_size; + return static_cast( + subgroup_size + + (static_cast(rank_dist - subgroup_size) + (offset % modulo)) % modulo); + } +} + +template +__host__ __device__ int priority_to_rank( + priority_t priority, + int root, + int subgroup_size /* faster interconnect within a subgroup */, + int comm_size, + vertex_t offset /* to evenly distribute traffict */) +{ + static_assert(sizeof(priority_t) == 1 || sizeof(priority_t) == 2 || sizeof(priority_t) == 4); + using cast_t = std::conditional_t< + sizeof(priority_t) == 1, + int16_t, + std::conditional_t>; // to prevent overflow + + if (priority == priority_t{0}) { + return root; + } else if (priority < static_cast(subgroup_size)) { + int modulo = subgroup_size - 1; + auto rank_dist = static_cast( + 1 + ((static_cast(priority - 1) + modulo) - (offset % modulo)) % modulo); + return static_cast((root - (root % subgroup_size)) + + ((static_cast(root) + rank_dist) % subgroup_size)); + } else { + int modulo = comm_size - subgroup_size; + auto rank_dist = static_cast( + subgroup_size + + ((static_cast(priority) - subgroup_size) + (modulo - (offset % modulo))) % modulo); + auto subgroup_dist = rank_dist / subgroup_size; + auto intra_subgroup_rank_dist = rank_dist % subgroup_size; + return static_cast( + ((static_cast((root / subgroup_size) * subgroup_size) + + subgroup_dist * subgroup_size) + + (static_cast(root) + intra_subgroup_rank_dist) % subgroup_size) % + comm_size); + } +} + +} // namespace detail + +} // namespace cugraph diff --git a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh index 5ebc3dc8aea..dd0da77851b 100644 --- a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh +++ b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh @@ -597,8 +597,6 @@ rmm::device_uvector compute_uniform_sampling_index_without_replacement( raft::random::RngState& rng_state, size_t K) { - assert(cugraph::invalid_edge_id_v == cugraph::ops::graph::INVALID_ID); - edge_t mid_partition_degree_range_last = static_cast(K * 10); // tuning parameter assert(mid_partition_degree_range_last > K); size_t high_partition_oversampling_K = K * 2; // tuning parameter @@ -1567,10 +1565,7 @@ uniform_sample_and_compute_local_nbr_indices( size_t K, bool with_replacement) { - using edge_t = typename GraphViewType::edge_type; - - assert(cugraph::invalid_edge_id_v == cugraph::ops::graph::INVALID_ID); - + using edge_t = typename GraphViewType::edge_type; using vertex_t = typename GraphViewType::vertex_type; using key_t = typename thrust::iterator_traits::value_type; diff --git a/cpp/src/prims/extract_transform_e.cuh b/cpp/src/prims/extract_transform_e.cuh index d51e03628e1..5741c98d90e 100644 --- a/cpp/src/prims/extract_transform_e.cuh +++ b/cpp/src/prims/extract_transform_e.cuh @@ -116,8 +116,8 @@ extract_transform_e(raft::handle_t const& handle, thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last())); auto value_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(std::ignore, value_buffer) = detail:: - extract_transform_v_frontier_e( + std::tie(std::ignore, value_buffer) = + detail::extract_transform_v_frontier_e( handle, graph_view, frontier, diff --git a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh index 7ad033b93c2..ba227b263bc 100644 --- a/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh +++ b/cpp/src/prims/extract_transform_v_frontier_outgoing_e.cuh @@ -64,13 +64,13 @@ namespace cugraph { * @return Dataframe buffer object storing extracted and accumulated valid @p e_op return values. */ template decltype(allocate_dataframe_buffer< - typename detail::edge_op_result_type(size_t{0}, handle.get_stream()); std::tie(std::ignore, value_buffer) = - detail::extract_transform_v_frontier_e(handle, - graph_view, - frontier, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op, - do_expensive_check); + detail::extract_transform_v_frontier_e(handle, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + do_expensive_check); return value_buffer; } diff --git a/cpp/src/prims/fill_edge_src_dst_property.cuh b/cpp/src/prims/fill_edge_src_dst_property.cuh index 58dbf7e74a0..a36cf332eb4 100644 --- a/cpp/src/prims/fill_edge_src_dst_property.cuh +++ b/cpp/src/prims/fill_edge_src_dst_property.cuh @@ -15,6 +15,8 @@ */ #pragma once +#include "prims/vertex_frontier.cuh" + #include #include #include @@ -129,8 +131,8 @@ template void fill_edge_major_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, EdgeMajorPropertyOutputWrapper edge_major_property_output, T input) { @@ -153,12 +155,12 @@ void fill_edge_major_property(raft::handle_t const& handle, auto const minor_comm_rank = minor_comm.get_rank(); auto const minor_comm_size = minor_comm.get_size(); - auto rx_counts = - host_scalar_allgather(minor_comm, - static_cast(thrust::distance(vertex_first, vertex_last)), - handle.get_stream()); - auto max_rx_size = - std::reduce(rx_counts.begin(), rx_counts.end(), size_t{0}, [](auto lhs, auto rhs) { + auto local_v_list_sizes = host_scalar_allgather( + minor_comm, + static_cast(thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last)), + handle.get_stream()); + auto max_rx_size = std::reduce( + local_v_list_sizes.begin(), local_v_list_sizes.end(), size_t{0}, [](auto lhs, auto rhs) { return std::max(lhs, rhs); }); rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); @@ -169,14 +171,18 @@ void fill_edge_major_property(raft::handle_t const& handle, edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - device_bcast( - minor_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + device_bcast(minor_comm, + sorted_unique_vertex_first, + rx_vertices.begin(), + local_v_list_sizes[i], + i, + handle.get_stream()); if (edge_partition_keys) { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(rx_counts[i]), + thrust::make_counting_iterator(local_v_list_sizes[i]), [rx_vertex_first = rx_vertices.begin(), input, edge_partition_key_first = ((*edge_partition_keys)[i]).begin(), @@ -199,7 +205,7 @@ void fill_edge_major_property(raft::handle_t const& handle, thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(static_cast(rx_counts[i])), + thrust::make_counting_iterator(static_cast(local_v_list_sizes[i])), [edge_partition, rx_vertex_first = rx_vertices.begin(), input, @@ -219,7 +225,7 @@ void fill_edge_major_property(raft::handle_t const& handle, // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), val_first, - val_first + rx_counts[i], + val_first + local_v_list_sizes[i], map_first, edge_partition_value_firsts[i]); } @@ -232,17 +238,18 @@ void fill_edge_major_property(raft::handle_t const& handle, assert(edge_partition_value_firsts.size() == size_t{1}); if constexpr (contains_packed_bool_element) { thrust::for_each(handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [input, output_value_first = edge_partition_value_firsts[0]] __device__( auto v) { packed_bool_atomic_set(output_value_first, v, input); }); } else { auto val_first = thrust::make_constant_iterator(input); - thrust::scatter(handle.get_thrust_policy(), - val_first, - val_first + thrust::distance(vertex_first, vertex_last), - vertex_first, - edge_partition_value_firsts[0]); + thrust::scatter( + handle.get_thrust_policy(), + val_first, + val_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + sorted_unique_vertex_first, + edge_partition_value_firsts[0]); } } } @@ -286,8 +293,8 @@ template void fill_edge_minor_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, EdgeMinorPropertyOutputWrapper edge_minor_property_output, T input) { @@ -300,22 +307,269 @@ void fill_edge_minor_property(raft::handle_t const& handle, using edge_t = typename GraphViewType::edge_type; auto edge_partition_value_first = edge_minor_property_output.value_first(); + vertex_t minor_range_first{}; + if constexpr (GraphViewType::is_storage_transposed) { + minor_range_first = graph_view.local_edge_partition_src_range_first(); + } else { + minor_range_first = graph_view.local_edge_partition_dst_range_first(); + } + if constexpr (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); auto const major_comm_rank = major_comm.get_rank(); auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); - auto rx_counts = - host_scalar_allgather(major_comm, - static_cast(thrust::distance(vertex_first, vertex_last)), - handle.get_stream()); - auto max_rx_size = - std::reduce(rx_counts.begin(), rx_counts.end(), size_t{0}, [](auto lhs, auto rhs) { - return std::max(lhs, rhs); - }); - rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); + constexpr size_t packed_bool_word_bcast_alignment = + 128 / + sizeof( + uint32_t); // 128B cache line alignment (unaligned ncclBroadcast operations are slower) + + std::vector max_tmp_buffer_sizes{}; + std::vector local_v_list_sizes{}; + std::vector local_v_list_range_firsts{}; + std::vector local_v_list_range_lasts{}; + { + auto v_list_size = static_cast( + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last)); + rmm::device_uvector d_aggregate_tmps(major_comm_size * size_t{4}, + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + d_aggregate_tmps.begin() + major_comm_rank * size_t{4}, + d_aggregate_tmps.begin() + (major_comm_rank + 1) * size_t{4}, + [max_tmp_buffer_size = static_cast( + static_cast(handle.get_device_properties().totalGlobalMem) * 0.05), + sorted_unique_vertex_first, + v_list_size, + vertex_partition_range_first = + graph_view.local_vertex_partition_range_first()] __device__(size_t i) { + if (i == 0) { + return max_tmp_buffer_size; + } else if (i == 1) { + return static_cast(v_list_size); + } else if (i == 2) { + vertex_t first{}; + if (v_list_size > 0) { + first = *sorted_unique_vertex_first; + } else { + first = vertex_partition_range_first; + } + assert(static_cast(static_cast(first)) == first); + return static_cast(first); + } else { + vertex_t last{}; + if (v_list_size > 0) { + last = *(sorted_unique_vertex_first + (v_list_size - 1)) + 1; + } else { + last = vertex_partition_range_first; + } + assert(static_cast(static_cast(last)) == last); + return static_cast(last); + } + }); + + if (major_comm_size > 1) { // allgather max_tmp_buffer_size, v_list_size, v_list_range_first + // (inclusive), v_list_range_last (exclusive) + device_allgather(major_comm, + d_aggregate_tmps.data() + major_comm_rank * size_t{4}, + d_aggregate_tmps.data(), + size_t{4}, + handle.get_stream()); + } + + std::vector h_aggregate_tmps(d_aggregate_tmps.size()); + raft::update_host(h_aggregate_tmps.data(), + d_aggregate_tmps.data(), + d_aggregate_tmps.size(), + handle.get_stream()); + handle.sync_stream(); + max_tmp_buffer_sizes = std::vector(major_comm_size); + local_v_list_sizes = std::vector(major_comm_size); + local_v_list_range_firsts = std::vector(major_comm_size); + local_v_list_range_lasts = std::vector(major_comm_size); + for (int i = 0; i < major_comm_size; ++i) { + max_tmp_buffer_sizes[i] = h_aggregate_tmps[i * size_t{4}]; + local_v_list_sizes[i] = static_cast(h_aggregate_tmps[i * size_t{4} + 1]); + local_v_list_range_firsts[i] = static_cast(h_aggregate_tmps[i * size_t{4} + 2]); + local_v_list_range_lasts[i] = static_cast(h_aggregate_tmps[i * size_t{4} + 3]); + } + } + + auto edge_partition_keys = edge_minor_property_output.keys(); + + std::optional> v_list_bitmap{std::nullopt}; + std::optional> compressed_v_list{std::nullopt}; + if (major_comm_size > 1) { + bool v_compressible{false}; + if constexpr (sizeof(vertex_t) > sizeof(uint32_t)) { + vertex_t local_v_list_max_range_size{0}; + for (int i = 0; i < major_comm_size; ++i) { + auto range_size = local_v_list_range_lasts[i] - local_v_list_range_firsts[i]; + local_v_list_max_range_size = std::max(range_size, local_v_list_max_range_size); + } + if (local_v_list_max_range_size <= + std::numeric_limits::max()) { // broadcast 32bit offset values instead of 64 + // bit vertex IDs + v_compressible = true; + } + } + + double avg_fill_ratio{0.0}; + for (int i = 0; i < major_comm_size; ++i) { + auto num_keys = static_cast(local_v_list_sizes[i]); + auto range_size = local_v_list_range_lasts[i] - local_v_list_range_firsts[i]; + avg_fill_ratio += (range_size > 0) + ? (static_cast(num_keys) / static_cast(range_size)) + : double{0.0}; + } + avg_fill_ratio /= static_cast(major_comm_size); + double threshold_ratio = + 1.0 / static_cast((v_compressible ? sizeof(uint32_t) : sizeof(vertex_t)) * 8); + auto avg_v_list_size = std::reduce(local_v_list_sizes.begin(), local_v_list_sizes.end()) / + static_cast(major_comm_size); + + if ((avg_fill_ratio > threshold_ratio) && + (static_cast(avg_v_list_size) > packed_bool_word_bcast_alignment)) { + if (is_packed_bool() && + !edge_partition_keys) { // directly update edge_minor_property_output (with special + // care for unaligned boundaries) + rmm::device_uvector boundary_words( + packed_bool_word_bcast_alignment, + handle.get_stream()); // for unaligned boundaries + auto leading_boundary_words = + (packed_bool_word_bcast_alignment - + packed_bool_offset(local_v_list_range_firsts[major_comm_rank] - minor_range_first) % + packed_bool_word_bcast_alignment) % + packed_bool_word_bcast_alignment; + if ((leading_boundary_words == 0) && + (packed_bool_offset(local_v_list_range_firsts[major_comm_rank] - minor_range_first) == + packed_bool_offset(graph_view.local_vertex_partition_range_first() - + minor_range_first)) && + (((local_v_list_range_firsts[major_comm_rank] - minor_range_first) % + packed_bools_per_word()) != + 0)) { // there are unaligned bits (fewer than packed_bools_per_word()) in the vertex + // partition boundary + leading_boundary_words = packed_bool_word_bcast_alignment; + } + thrust::fill(handle.get_thrust_policy(), + boundary_words.begin(), + boundary_words.begin() + leading_boundary_words, + packed_bool_empty_mask()); + if (local_v_list_range_firsts[major_comm_rank] < + local_v_list_range_lasts[major_comm_rank]) { + auto word_offset_first = + packed_bool_offset(local_v_list_range_firsts[major_comm_rank] - minor_range_first); + auto word_offset_last = + packed_bool_offset((local_v_list_range_lasts[major_comm_rank] - 1) - + minor_range_first) + + 1; + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(word_offset_first), + thrust::make_counting_iterator(word_offset_last), + [sorted_unique_vertex_first, + sorted_unique_vertex_last, + input, + minor_range_first, + leading_boundary_words, + word_offset_first, + vertex_partition_range_last = graph_view.local_vertex_partition_range_last(), + output_value_first = edge_partition_value_first, + boundary_words = raft::device_span( + boundary_words.data(), boundary_words.size())] __device__(auto i) { + auto& word = ((i - word_offset_first) < leading_boundary_words) + ? boundary_words[i - word_offset_first] + : *(output_value_first + i); + auto word_v_first = + minor_range_first + static_cast(i * packed_bools_per_word()); + auto word_v_last = + ((vertex_partition_range_last - word_v_first) <= packed_bools_per_word()) + ? vertex_partition_range_last + : (word_v_first + static_cast(packed_bools_per_word())); + auto it = thrust::lower_bound( + thrust::seq, sorted_unique_vertex_first, sorted_unique_vertex_last, word_v_first); + while ((it != sorted_unique_vertex_last) && (*it < word_v_last)) { + auto v_offset = *it - minor_range_first; + if (input) { + word |= packed_bool_mask(v_offset); + } else { + word &= ~packed_bool_mask(v_offset); + } + ++it; + } + }); + } + rmm::device_uvector aggregate_boundary_words( + major_comm_size * packed_bool_word_bcast_alignment, handle.get_stream()); + device_allgather(major_comm, + boundary_words.data(), + aggregate_boundary_words.data(), + packed_bool_word_bcast_alignment, + handle.get_stream()); + v_list_bitmap = std::move(aggregate_boundary_words); + } else { + v_list_bitmap = + compute_vertex_list_bitmap_info(sorted_unique_vertex_first, + sorted_unique_vertex_last, + local_v_list_range_firsts[major_comm_rank], + local_v_list_range_lasts[major_comm_rank], + handle.get_stream()); + } + } else if (v_compressible) { + rmm::device_uvector tmps(local_v_list_sizes[major_comm_rank], + handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + sorted_unique_vertex_first, + sorted_unique_vertex_last, + tmps.begin(), + cuda::proclaim_return_type( + [range_first = local_v_list_range_firsts[major_comm_rank]] __device__( + auto v) { return static_cast(v - range_first); })); + compressed_v_list = std::move(tmps); + } + } + + std::optional> stream_pool_indices{std::nullopt}; + size_t num_concurrent_bcasts{1}; + { + size_t tmp_buffer_size_per_loop{}; + for (int i = 0; i < major_comm_size; ++i) { + if (is_packed_bool() && + !edge_partition_keys && v_list_bitmap) { + tmp_buffer_size_per_loop += 0; + } else if (v_list_bitmap) { + tmp_buffer_size_per_loop += + packed_bool_size(local_v_list_range_lasts[i] - local_v_list_range_firsts[i]) * + sizeof(uint32_t) + + static_cast(local_v_list_sizes[i]) * sizeof(vertex_t); + } else { + tmp_buffer_size_per_loop += static_cast(local_v_list_sizes[i]) * sizeof(vertex_t); + } + } + tmp_buffer_size_per_loop /= major_comm_size; + size_t max_streams = + static_cast(major_comm_size); // to allow setting num_concurrent_bcasts above + // hnadle.get_stream_pool_size() + stream_pool_indices = init_stream_pool_indices( + std::reduce(max_tmp_buffer_sizes.begin(), max_tmp_buffer_sizes.end()) / + static_cast(major_comm_size), + tmp_buffer_size_per_loop, + major_comm_size, + 1, + max_streams); + num_concurrent_bcasts = (*stream_pool_indices).size(); + if ((*stream_pool_indices).size() > handle.get_stream_pool_size()) { + (*stream_pool_indices).resize(handle.get_stream_pool_size()); + } + if ((*stream_pool_indices).size() <= 1) { stream_pool_indices = std::nullopt; } + } std::optional> key_offsets{}; if constexpr (GraphViewType::is_storage_transposed) { @@ -324,88 +578,417 @@ void fill_edge_minor_property(raft::handle_t const& handle, key_offsets = graph_view.local_sorted_unique_edge_dst_vertex_partition_offsets(); } - auto edge_partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(size_t{0})); - auto edge_partition_keys = edge_minor_property_output.keys(); - for (int i = 0; i < major_comm_size; ++i) { - // FIXME: these broadcast operations can be placed between ncclGroupStart() and - // ncclGroupEnd() - device_bcast( - major_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + for (size_t i = 0; i < static_cast(major_comm_size); i += num_concurrent_bcasts) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto sub0 = std::chrono::steady_clock::now(); + auto loop_count = std::min(num_concurrent_bcasts, static_cast(major_comm_size) - i); + + if (is_packed_bool() && + !edge_partition_keys && v_list_bitmap) { + std::vector leading_boundary_word_counts(loop_count); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto leading_boundary_words = + (packed_bool_word_bcast_alignment - + packed_bool_offset(local_v_list_range_firsts[partition_idx] - minor_range_first) % + packed_bool_word_bcast_alignment) % + packed_bool_word_bcast_alignment; + auto vertex_partition_id = + partition_manager::compute_vertex_partition_id_from_graph_subcomm_ranks( + major_comm_size, minor_comm_size, partition_idx, minor_comm_rank); + if ((leading_boundary_words == 0) && + (packed_bool_offset(local_v_list_range_firsts[partition_idx] - minor_range_first) == + packed_bool_offset(graph_view.vertex_partition_range_first(vertex_partition_id) - + minor_range_first)) && + (((local_v_list_range_firsts[partition_idx] - minor_range_first) % + packed_bools_per_word()) != 0)) { + leading_boundary_words = packed_bool_word_bcast_alignment; + } + leading_boundary_word_counts[j] = leading_boundary_words; + } + device_group_start(major_comm); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + size_t bcast_size{0}; + vertex_t packed_bool_offset_first{0}; + if (local_v_list_range_firsts[partition_idx] < local_v_list_range_lasts[partition_idx]) { + auto leading_boundary_words = leading_boundary_word_counts[j]; + packed_bool_offset_first = + packed_bool_offset(local_v_list_range_firsts[partition_idx] - minor_range_first) + + static_cast(leading_boundary_words); + auto packed_bool_offset_last = + packed_bool_offset(local_v_list_range_lasts[partition_idx] - 1 - minor_range_first); + if (packed_bool_offset_first <= packed_bool_offset_last) { + bcast_size = (packed_bool_offset_last - packed_bool_offset_first) + 1; + } + } + + device_bcast(major_comm, + edge_partition_value_first + packed_bool_offset_first, + edge_partition_value_first + packed_bool_offset_first, + bcast_size, + static_cast(partition_idx), + handle.get_stream()); + } + device_group_end(major_comm); + + rmm::device_uvector d_leading_boundary_word_counts( + leading_boundary_word_counts.size(), handle.get_stream()); + raft::update_device(d_leading_boundary_word_counts.data(), + leading_boundary_word_counts.data(), + leading_boundary_word_counts.size(), + handle.get_stream()); + + rmm::device_uvector d_local_v_list_range_firsts(loop_count, handle.get_stream()); + raft::update_device(d_local_v_list_range_firsts.data(), + local_v_list_range_firsts.data() + i, + loop_count, + handle.get_stream()); - if (edge_partition_keys) { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(rx_counts[i]), - [rx_vertex_first = rx_vertices.begin(), - input, - subrange_key_first = (*edge_partition_keys).begin() + (*key_offsets)[i], - subrange_key_last = (*edge_partition_keys).begin() + (*key_offsets)[i + 1], - edge_partition_value_first = edge_partition_value_first, - subrange_start_offset = (*key_offsets)[i]] __device__(auto i) { - auto minor = *(rx_vertex_first + i); - auto it = - thrust::lower_bound(thrust::seq, subrange_key_first, subrange_key_last, minor); - if ((it != subrange_key_last) && (*it == minor)) { - auto subrange_offset = thrust::distance(subrange_key_first, it); - if constexpr (contains_packed_bool_element) { - fill_scalar_or_thrust_tuple( - edge_partition_value_first, subrange_start_offset + subrange_offset, input); - } else { - *(edge_partition_value_first + subrange_start_offset + subrange_offset) = input; + thrust::make_counting_iterator(loop_count * packed_bool_word_bcast_alignment), + [input, + minor_range_first, + leading_boundary_word_counts = raft::device_span( + d_leading_boundary_word_counts.data(), d_leading_boundary_word_counts.size()), + local_v_list_range_firsts = raft::device_span( + d_local_v_list_range_firsts.data(), d_local_v_list_range_firsts.size()), + aggregate_boundary_words = raft::device_span( + (*v_list_bitmap).data() + i * packed_bool_word_bcast_alignment, + loop_count * packed_bool_word_bcast_alignment), + output_value_first = edge_partition_value_first] __device__(size_t i) { + auto j = i / packed_bool_word_bcast_alignment; + auto leading_boundary_words = leading_boundary_word_counts[j]; + if ((i % packed_bool_word_bcast_alignment) < leading_boundary_words) { + auto boundary_word = aggregate_boundary_words[i]; + if (boundary_word != packed_bool_empty_mask()) { + auto word_offset = + packed_bool_offset(local_v_list_range_firsts[j] - minor_range_first) + + (i % packed_bool_word_bcast_alignment); + cuda::atomic_ref word( + *(output_value_first + word_offset)); + if (input) { + word.fetch_or(aggregate_boundary_words[i], cuda::std::memory_order_relaxed); + } else { + word.fetch_and(~aggregate_boundary_words[i], cuda::std::memory_order_relaxed); + } } } }); } else { - if constexpr (contains_packed_bool_element) { - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(static_cast(rx_counts[i])), - [edge_partition, - rx_vertex_first = rx_vertices.begin(), - input, - output_value_first = edge_partition_value_first] __device__(auto i) { - auto rx_vertex = *(rx_vertex_first + i); - auto minor_offset = - edge_partition.minor_offset_from_minor_nocheck(rx_vertex); - fill_scalar_or_thrust_tuple(output_value_first, minor_offset, input); - }); - } else { - auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), - cuda::proclaim_return_type([edge_partition] __device__(auto v) { - return edge_partition.minor_offset_from_minor_nocheck(v); - })); - auto val_first = thrust::make_constant_iterator(input); - // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and - // directly scatters from the internal buffer) - thrust::scatter(handle.get_thrust_policy(), - val_first, - val_first + rx_counts[i], - map_first, - edge_partition_value_first); + std::vector, rmm::device_uvector>> + edge_partition_v_buffers{}; + edge_partition_v_buffers.reserve(loop_count); + rmm::device_uvector dummy_counters(loop_count, handle.get_stream()); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + + std::variant, rmm::device_uvector> v_buffer = + rmm::device_uvector(0, handle.get_stream()); + if (v_list_bitmap) { + v_buffer = rmm::device_uvector( + packed_bool_size(local_v_list_range_lasts[partition_idx] - + local_v_list_range_firsts[partition_idx]), + handle.get_stream()); + } else if (compressed_v_list) { + v_buffer = + rmm::device_uvector(local_v_list_sizes[partition_idx], handle.get_stream()); + } else { + std::get<0>(v_buffer).resize(local_v_list_sizes[partition_idx], handle.get_stream()); + } + edge_partition_v_buffers.push_back(std::move(v_buffer)); + } + + device_group_start(major_comm); + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + + auto& v_buffer = edge_partition_v_buffers[j]; + if (v_list_bitmap) { + device_bcast(major_comm, + (*v_list_bitmap).data(), + std::get<1>(v_buffer).data(), + std::get<1>(v_buffer).size(), + static_cast(partition_idx), + handle.get_stream()); + } else if (compressed_v_list) { + device_bcast(major_comm, + (*compressed_v_list).data(), + std::get<1>(v_buffer).data(), + std::get<1>(v_buffer).size(), + static_cast(partition_idx), + handle.get_stream()); + } else { + device_bcast(major_comm, + (static_cast(partition_idx) == major_comm_rank) + ? sorted_unique_vertex_first + : static_cast(nullptr), + std::get<0>(v_buffer).data(), + std::get<0>(v_buffer).size(), + static_cast(partition_idx), + handle.get_stream()); + } + } + device_group_end(major_comm); + bool kernel_fusion = + !edge_partition_keys && !v_list_bitmap && (loop_count > 1) && + (static_cast(std::reduce(local_v_list_sizes.begin() + i, + local_v_list_sizes.begin() + (i + loop_count))) < + size_t{256 * 1024} /* tuning parameter (binary search vs kernel launch overhead) */ * + loop_count); // FIXME: kernle fusion can be useful even when + // edge_partition_keys.has_value() is true + + if (!kernel_fusion) { + if (stream_pool_indices) { handle.sync_stream(); } + } + + if (!kernel_fusion) { + size_t stream_pool_size{0}; + if (stream_pool_indices) { stream_pool_size = (*stream_pool_indices).size(); } + for (size_t j = 0; j < loop_count; ++j) { + auto partition_idx = i + j; + auto loop_stream = + stream_pool_indices + ? handle.get_stream_from_stream_pool((*stream_pool_indices)[j % stream_pool_size]) + : handle.get_stream(); + + if (v_list_bitmap) { + auto const& rx_bitmap = std::get<1>(edge_partition_v_buffers[j]); + rmm::device_uvector rx_vertices(local_v_list_sizes[partition_idx], + loop_stream); + retrieve_vertex_list_from_bitmap( + raft::device_span(rx_bitmap.data(), rx_bitmap.size()), + rx_vertices.begin(), + raft::device_span(dummy_counters.data() + j, size_t{1}), + local_v_list_range_firsts[partition_idx], + local_v_list_range_lasts[partition_idx], + loop_stream); + edge_partition_v_buffers[j] = std::move(rx_vertices); + } + + if (edge_partition_keys) { + thrust::for_each( + rmm::exec_policy_nosync(loop_stream), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(local_v_list_sizes[partition_idx]), + [rx_vertex_first = compressed_v_list + ? static_cast(nullptr) + : std::get<0>(edge_partition_v_buffers[j]).data(), + rx_compressed_vertex_first = compressed_v_list + ? std::get<1>(edge_partition_v_buffers[j]).data() + : static_cast(nullptr), + range_first = local_v_list_range_firsts[partition_idx], + input, + subrange_key_first = + (*edge_partition_keys).begin() + (*key_offsets)[partition_idx], + subrange_key_last = + (*edge_partition_keys).begin() + (*key_offsets)[partition_idx + 1], + edge_partition_value_first = edge_partition_value_first, + subrange_start_offset = (*key_offsets)[partition_idx]] __device__(auto i) { + vertex_t minor{}; + if (rx_vertex_first != nullptr) { + minor = *(rx_vertex_first + i); + } else { + minor = range_first + *(rx_compressed_vertex_first + i); + } + auto it = + thrust::lower_bound(thrust::seq, subrange_key_first, subrange_key_last, minor); + if ((it != subrange_key_last) && (*it == minor)) { + auto subrange_offset = thrust::distance(subrange_key_first, it); + if constexpr (contains_packed_bool_element) { + fill_scalar_or_thrust_tuple( + edge_partition_value_first, subrange_start_offset + subrange_offset, input); + } else { + *(edge_partition_value_first + subrange_start_offset + subrange_offset) = + input; + } + } + }); + } else { + if constexpr (contains_packed_bool_element) { + thrust::for_each( + rmm::exec_policy_nosync(loop_stream), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(local_v_list_sizes[partition_idx]), + [minor_range_first, + rx_vertex_first = compressed_v_list + ? static_cast(nullptr) + : std::get<0>(edge_partition_v_buffers[j]).data(), + rx_compressed_vertex_first = compressed_v_list + ? std::get<1>(edge_partition_v_buffers[j]).data() + : static_cast(nullptr), + range_first = local_v_list_range_firsts[partition_idx], + input, + output_value_first = edge_partition_value_first] __device__(auto i) { + vertex_t minor{}; + if (rx_vertex_first != nullptr) { + minor = *(rx_vertex_first + i); + } else { + minor = range_first + *(rx_compressed_vertex_first + i); + } + auto minor_offset = minor - minor_range_first; + fill_scalar_or_thrust_tuple(output_value_first, minor_offset, input); + }); + } else { + if (compressed_v_list) { + auto map_first = thrust::make_transform_iterator( + std::get<1>(edge_partition_v_buffers[j]).begin(), + cuda::proclaim_return_type( + [minor_range_first, + range_first = + local_v_list_range_firsts[partition_idx]] __device__(auto v_offset) { + return static_cast(v_offset + (range_first - minor_range_first)); + })); + auto val_first = thrust::make_constant_iterator(input); + thrust::scatter(rmm::exec_policy_nosync(loop_stream), + val_first, + val_first + local_v_list_sizes[partition_idx], + map_first, + edge_partition_value_first); + } else { + auto map_first = thrust::make_transform_iterator( + std::get<0>(edge_partition_v_buffers[j]).begin(), + cuda::proclaim_return_type( + [minor_range_first] __device__(auto v) { return v - minor_range_first; })); + auto val_first = thrust::make_constant_iterator(input); + thrust::scatter(rmm::exec_policy_nosync(loop_stream), + val_first, + val_first + local_v_list_sizes[partition_idx], + map_first, + edge_partition_value_first); + } + } + } + } + if (stream_pool_indices) { handle.sync_stream_pool(*stream_pool_indices); } + } else { // kernel fusion + std::vector h_vertex_vars(loop_count /* range_first values */ + + (loop_count + 1) /* loop offsets */); + std::copy(local_v_list_range_firsts.begin() + i, + local_v_list_range_firsts.begin() + (i + loop_count), + h_vertex_vars.begin()); + h_vertex_vars[loop_count] = 0; + std::inclusive_scan(local_v_list_sizes.begin() + i, + local_v_list_sizes.begin() + (i + loop_count), + h_vertex_vars.begin() + (loop_count + 1)); + std::vector h_ptrs(loop_count); + if (compressed_v_list) { + for (size_t j = 0; j < loop_count; ++j) { + h_ptrs[j] = static_cast(std::get<1>(edge_partition_v_buffers[j]).data()); + } + } else { + for (size_t j = 0; j < loop_count; ++j) { + h_ptrs[j] = static_cast(std::get<0>(edge_partition_v_buffers[j]).data()); + } + } + rmm::device_uvector d_vertex_vars(h_vertex_vars.size(), handle.get_stream()); + rmm::device_uvector d_ptrs(h_ptrs.size(), handle.get_stream()); + raft::update_device( + d_vertex_vars.data(), h_vertex_vars.data(), h_vertex_vars.size(), handle.get_stream()); + raft::update_device(d_ptrs.data(), h_ptrs.data(), h_ptrs.size(), handle.get_stream()); + + raft::device_span range_firsts(d_vertex_vars.data(), loop_count); + raft::device_span loop_offsets(d_vertex_vars.data() + loop_count, + loop_count + 1); + if constexpr (contains_packed_bool_element) { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(h_vertex_vars.back()), + [range_firsts, + loop_offsets, + minor_range_first, + input, + rx_firsts = raft::device_span(d_ptrs.data(), d_ptrs.size()), + output_value_first = edge_partition_value_first, + compressed = compressed_v_list.has_value()] __device__(auto i) { + auto loop_idx = + thrust::distance(loop_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, loop_offsets.begin() + 1, loop_offsets.end(), i)); + auto rx_first = rx_firsts[loop_idx]; + vertex_t minor{}; + if (compressed) { + minor = range_firsts[loop_idx] + + *(static_cast(rx_first) + (i - loop_offsets[loop_idx])); + } else { + minor = *(static_cast(rx_first) + (i - loop_offsets[loop_idx])); + } + auto minor_offset = minor - minor_range_first; + fill_scalar_or_thrust_tuple(output_value_first, minor_offset, input); + }); + } else { + auto val_first = thrust::make_constant_iterator(input); + if (compressed_v_list) { + auto map_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(vertex_t{0}), + cuda::proclaim_return_type( + [range_firsts, + loop_offsets, + rx_firsts = raft::device_span(d_ptrs.data(), d_ptrs.size()), + minor_range_first] __device__(auto i) { + auto loop_idx = thrust::distance( + loop_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, loop_offsets.begin() + 1, loop_offsets.end(), i)); + auto minor = + range_firsts[loop_idx] + *(static_cast(rx_firsts[loop_idx]) + + (i - loop_offsets[loop_idx])); + return minor - minor_range_first; + })); + thrust::scatter(handle.get_thrust_policy(), + val_first, + val_first + h_vertex_vars.back(), + map_first, + edge_partition_value_first); + } else { + auto map_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(vertex_t{0}), + cuda::proclaim_return_type( + [loop_offsets, + rx_firsts = raft::device_span(d_ptrs.data(), d_ptrs.size()), + minor_range_first] __device__(auto i) { + auto loop_idx = thrust::distance( + loop_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, loop_offsets.begin() + 1, loop_offsets.end(), i)); + auto minor = *(static_cast(rx_firsts[loop_idx]) + + (i - loop_offsets[loop_idx])); + return minor - minor_range_first; + })); + thrust::scatter(handle.get_thrust_policy(), + val_first, + val_first + h_vertex_vars.back(), + map_first, + edge_partition_value_first); + } + } } } } } else { assert(graph_view.local_vertex_partition_range_size() == - graph_view.local_edge_partition_src_range_size()); + (GraphViewType::is_storage_transposed + ? graph_view.local_edge_partition_src_range_size() + : graph_view.local_edge_partition_dst_range_sizse())); if constexpr (contains_packed_bool_element) { thrust::for_each(handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [input, output_value_first = edge_partition_value_first] __device__(auto v) { fill_scalar_or_thrust_tuple(output_value_first, v, input); }); } else { auto val_first = thrust::make_constant_iterator(input); - thrust::scatter(handle.get_thrust_policy(), - val_first, - val_first + thrust::distance(vertex_first, vertex_last), - vertex_first, - edge_partition_value_first); + thrust::scatter( + handle.get_thrust_policy(), + val_first, + val_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + sorted_unique_vertex_first, + edge_partition_value_first); } } } @@ -451,8 +1034,8 @@ void fill_edge_src_property(raft::handle_t const& handle, /** * @brief Fill graph edge source property values to the input value. * - * This version fills only a subset of graph edge source property values. [@p vertex_first, - * @p vertex_last) specifies the vertices to be filled. + * This version fills only a subset of graph edge source property values. [@p + * sorted_unique_vertex_first, @p sorted_unique_vertex_last) specifies the vertices to be filled. * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexIterator Type of the iterator for vertex identifiers. @@ -461,10 +1044,12 @@ void fill_edge_src_property(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex with a value to be filled. - * v in [vertex_first, vertex_last) should be distinct (and should belong to the vertex partition - * assigned to this process in multi-GPU), otherwise undefined behavior. - * @param vertex_last Iterator pointing to the last (exclusive) vertex with a value to be filled. + * @param sorted_unique_vertex_first Iterator pointing to the first (inclusive) vertex with a value + * to be filled. v in [vertex_first, sorted_unique_vertex_last) should be sorted & distinct (and + * should belong to the vertex partition assigned to this process in multi-GPU), otherwise undefined + * behavior. + * @param sorted_unique_vertex_last Iterator pointing to the last (exclusive) vertex with a value to + * be filled. * @param edge_src_property_output edge_src_property_view_t class object to store source property * values (for the edge source assigned to this process in multi-GPU). * @param input Edge source property values will be set to @p input. @@ -476,8 +1061,8 @@ template void fill_edge_src_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, EdgeSrcValueOutputWrapper edge_src_property_output, T input, bool do_expensive_check = false) @@ -486,8 +1071,8 @@ void fill_edge_src_property(raft::handle_t const& handle, if (do_expensive_check) { auto num_invalids = thrust::count_if( handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), local_vertex_partition_range_last = graph_view.local_vertex_partition_range_last()] __device__(auto v) { @@ -498,17 +1083,25 @@ void fill_edge_src_property(raft::handle_t const& handle, num_invalids = host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); } - CUGRAPH_EXPECTS( - num_invalids == 0, - "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + CUGRAPH_EXPECTS(num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in " + "[sorted_unique_vertex_first, sorted_unique_vertex_last)."); } if constexpr (GraphViewType::is_storage_transposed) { - detail::fill_edge_minor_property( - handle, graph_view, vertex_first, vertex_last, edge_src_property_output, input); + detail::fill_edge_minor_property(handle, + graph_view, + sorted_unique_vertex_first, + sorted_unique_vertex_last, + edge_src_property_output, + input); } else { - detail::fill_edge_major_property( - handle, graph_view, vertex_first, vertex_last, edge_src_property_output, input); + detail::fill_edge_major_property(handle, + graph_view, + sorted_unique_vertex_first, + sorted_unique_vertex_last, + edge_src_property_output, + input); } } @@ -552,8 +1145,8 @@ void fill_edge_dst_property(raft::handle_t const& handle, /** * @brief Fill graph edge destination property values to the input value. * - * This version fills only a subset of graph edge destination property values. [@p vertex_first, - * @p vertex_last) specifies the vertices to be filled. + * This version fills only a subset of graph edge destination property values. [@p + * sorted_unique_vertex_first, @p sorted_unique_vertex_last) specifies the vertices to be filled. * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexIterator Type of the iterator for vertex identifiers. @@ -563,10 +1156,12 @@ void fill_edge_dst_property(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex with a value to be filled. - * v in [vertex_first, vertex_last) should be distinct (and should belong to the vertex partition - * assigned to this process in multi-GPU), otherwise undefined behavior. - * @param vertex_last Iterator pointing to the last (exclusive) vertex with a value to be filled. + * @param sorted_unique_vertex_first Iterator pointing to the first (inclusive) vertex with a value + * to be filled. v in [sorted_unique_vertex_first, sorted_unique_vertex_last) should be sorted & + * distinct (and should belong to the vertex partition assigned to this process in multi-GPU), + * otherwise undefined behavior. + * @param sorted_unique_vertex_last Iterator pointing to the last (exclusive) vertex with a value to + * be filled. * @param edge_dst_property_output edge_dst_property_view_t class object to store destination * property values (for the edge destinations assigned to this process in multi-GPU). * @param input Edge destination property values will be set to @p input. @@ -578,8 +1173,8 @@ template void fill_edge_dst_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, EdgeDstValueOutputWrapper edge_dst_property_output, T input, bool do_expensive_check = false) @@ -588,8 +1183,8 @@ void fill_edge_dst_property(raft::handle_t const& handle, if (do_expensive_check) { auto num_invalids = thrust::count_if( handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), local_vertex_partition_range_last = graph_view.local_vertex_partition_range_last()] __device__(auto v) { @@ -600,17 +1195,25 @@ void fill_edge_dst_property(raft::handle_t const& handle, num_invalids = host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); } - CUGRAPH_EXPECTS( - num_invalids == 0, - "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + CUGRAPH_EXPECTS(num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in " + "[sorted_unique_vertex_first, sorted_unique_vertex_last)."); } if constexpr (GraphViewType::is_storage_transposed) { - detail::fill_edge_major_property( - handle, graph_view, vertex_first, vertex_last, edge_dst_property_output, input); + detail::fill_edge_major_property(handle, + graph_view, + sorted_unique_vertex_first, + sorted_unique_vertex_last, + edge_dst_property_output, + input); } else { - detail::fill_edge_minor_property( - handle, graph_view, vertex_first, vertex_last, edge_dst_property_output, input); + detail::fill_edge_minor_property(handle, + graph_view, + sorted_unique_vertex_first, + sorted_unique_vertex_last, + edge_dst_property_output, + input); } } diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index ce5e5d3e8cf..f03e8f54fb2 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -250,11 +250,14 @@ void per_v_pair_transform_dst_nbr_intersection( } auto num_input_pairs = static_cast(thrust::distance(vertex_pair_first, vertex_pair_last)); - std::optional> unique_vertices{std::nullopt}; + std::optional> sorted_unique_vertices{std::nullopt}; std::optional(size_t{0}, rmm::cuda_stream_view{}))> - property_buffer_for_unique_vertices{std::nullopt}; + property_buffer_for_sorted_unique_vertices{std::nullopt}; if constexpr (GraphViewType::is_multi_gpu) { - unique_vertices = rmm::device_uvector(num_input_pairs * 2, handle.get_stream()); + auto& comm = handle.get_comms(); + + sorted_unique_vertices = + rmm::device_uvector(num_input_pairs * 2, handle.get_stream()); auto elem0_first = thrust::make_transform_iterator( vertex_pair_first, cugraph::thrust_tuple_get::value_type, @@ -262,7 +265,7 @@ void per_v_pair_transform_dst_nbr_intersection( thrust::copy(handle.get_thrust_policy(), elem0_first, elem0_first + num_input_pairs, - (*unique_vertices).begin()); + (*sorted_unique_vertices).begin()); auto elem1_first = thrust::make_transform_iterator( vertex_pair_first, cugraph::thrust_tuple_get::value_type, @@ -270,25 +273,25 @@ void per_v_pair_transform_dst_nbr_intersection( thrust::copy(handle.get_thrust_policy(), elem1_first, elem1_first + num_input_pairs, - (*unique_vertices).begin() + num_input_pairs); - thrust::sort(handle.get_thrust_policy(), (*unique_vertices).begin(), (*unique_vertices).end()); - (*unique_vertices) - .resize(thrust::distance((*unique_vertices).begin(), + (*sorted_unique_vertices).begin() + num_input_pairs); + thrust::sort(handle.get_thrust_policy(), + (*sorted_unique_vertices).begin(), + (*sorted_unique_vertices).end()); + (*sorted_unique_vertices) + .resize(thrust::distance((*sorted_unique_vertices).begin(), thrust::unique(handle.get_thrust_policy(), - (*unique_vertices).begin(), - (*unique_vertices).end())), + (*sorted_unique_vertices).begin(), + (*sorted_unique_vertices).end())), handle.get_stream()); - std::tie(unique_vertices, property_buffer_for_unique_vertices) = - collect_values_for_unique_int_vertices(handle, - std::move(*unique_vertices), - vertex_value_input_first, - graph_view.vertex_partition_range_lasts()); - thrust::sort_by_key( - handle.get_thrust_policy(), - (*unique_vertices).begin(), - (*unique_vertices).end(), - (*property_buffer_for_unique_vertices).begin()); // necessary for binary search + property_buffer_for_sorted_unique_vertices = collect_values_for_sorted_unique_int_vertices( + comm, + raft::device_span((*sorted_unique_vertices).data(), + (*sorted_unique_vertices).size()), + vertex_value_input_first, + graph_view.vertex_partition_range_lasts(), + graph_view.local_vertex_partition_range_first(), + handle.get_stream()); } rmm::device_uvector vertex_pair_indices(num_input_pairs, handle.get_stream()); @@ -412,32 +415,32 @@ void per_v_pair_transform_dst_nbr_intersection( do_expensive_check); } - if (unique_vertices) { - auto vertex_value_input_for_unique_vertices_first = - get_dataframe_buffer_begin(*property_buffer_for_unique_vertices); - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(this_chunk_size), - detail::call_intersection_op_t< - GraphViewType, - decltype(vertex_value_input_for_unique_vertices_first), - typename decltype(r_nbr_intersection_property_values0)::const_pointer, - IntersectionOp, - decltype(chunk_vertex_pair_index_first), - VertexPairIterator, - VertexPairValueOutputIterator>{edge_partition, - thrust::make_optional>( - (*unique_vertices).data(), (*unique_vertices).size()), - vertex_value_input_for_unique_vertices_first, - intersection_op, - intersection_offsets.data(), - intersection_indices.data(), - r_nbr_intersection_property_values0.data(), - r_nbr_intersection_property_values1.data(), - chunk_vertex_pair_index_first, - vertex_pair_first, - vertex_pair_value_output_first}); + if (sorted_unique_vertices) { + auto vertex_value_input_for_sorted_unique_vertices_first = + get_dataframe_buffer_begin(*property_buffer_for_sorted_unique_vertices); + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(this_chunk_size), + detail::call_intersection_op_t< + GraphViewType, + decltype(vertex_value_input_for_sorted_unique_vertices_first), + typename decltype(r_nbr_intersection_property_values0)::const_pointer, + IntersectionOp, + decltype(chunk_vertex_pair_index_first), + VertexPairIterator, + VertexPairValueOutputIterator>{ + edge_partition, + thrust::make_optional>( + (*sorted_unique_vertices).data(), (*sorted_unique_vertices).size()), + vertex_value_input_for_sorted_unique_vertices_first, + intersection_op, + intersection_offsets.data(), + intersection_indices.data(), + r_nbr_intersection_property_values0.data(), + r_nbr_intersection_property_values1.data(), + chunk_vertex_pair_index_first, + vertex_pair_first, + vertex_pair_value_output_first}); } else { thrust::for_each(handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index dd34d4b06ab..30706632ad2 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -206,7 +206,7 @@ struct return_value_compute_offset_t { template >, decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{}))> per_v_random_select_transform_e(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier, + KeyBucketType const& key_list, EdgeBiasSrcValueInputWrapper edge_bias_src_value_input, EdgeBiasDstValueInputWrapper edge_bias_dst_value_input, EdgeBiasValueInputWrapper edge_bias_value_input, @@ -237,7 +237,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; - using key_t = typename VertexFrontierBucketType::key_type; + using key_t = typename KeyBucketType::key_type; using key_buffer_t = dataframe_buffer_type_t; using edge_partition_src_input_device_view_t = std::conditional_t< @@ -286,15 +286,15 @@ per_v_random_select_transform_e(raft::handle_t const& handle, if (do_expensive_check) { // FIXME: better re-factor this check function? - auto frontier_vertex_first = - thrust_tuple_get_or_identity(frontier.begin()); - auto frontier_vertex_last = - thrust_tuple_get_or_identity(frontier.end()); + auto key_list_vertex_first = + thrust_tuple_get_or_identity(key_list.begin()); + auto key_list_vertex_last = + thrust_tuple_get_or_identity(key_list.end()); auto num_invalid_keys = - frontier.size() - + key_list.size() - thrust::count_if(handle.get_thrust_policy(), - frontier_vertex_first, - frontier_vertex_last, + key_list_vertex_first, + key_list_vertex_last, check_in_range_t{graph_view.local_vertex_partition_range_first(), graph_view.local_vertex_partition_range_last()}); if constexpr (GraphViewType::is_multi_gpu) { @@ -302,35 +302,35 @@ per_v_random_select_transform_e(raft::handle_t const& handle, handle.get_comms(), num_invalid_keys, raft::comms::op_t::SUM, handle.get_stream()); } CUGRAPH_EXPECTS(num_invalid_keys == size_t{0}, - "Invalid input argument: frontier includes out-of-range keys."); + "Invalid input argument: key_list includes out-of-range keys."); } - std::vector local_frontier_sizes{}; + std::vector local_key_list_sizes{}; if (minor_comm_size > 1) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - local_frontier_sizes = host_scalar_allgather(minor_comm, frontier.size(), handle.get_stream()); + local_key_list_sizes = host_scalar_allgather(minor_comm, key_list.size(), handle.get_stream()); } else { - local_frontier_sizes = std::vector{frontier.size()}; + local_key_list_sizes = std::vector{key_list.size()}; } - std::vector local_frontier_displacements(local_frontier_sizes.size()); - std::exclusive_scan(local_frontier_sizes.begin(), - local_frontier_sizes.end(), - local_frontier_displacements.begin(), + std::vector local_key_list_displacements(local_key_list_sizes.size()); + std::exclusive_scan(local_key_list_sizes.begin(), + local_key_list_sizes.end(), + local_key_list_displacements.begin(), size_t{0}); - // 1. aggregate frontier + // 1. aggregate key_list - std::optional aggregate_local_frontier{std::nullopt}; + std::optional aggregate_local_key_list{std::nullopt}; if (minor_comm_size > 1) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - aggregate_local_frontier = allocate_dataframe_buffer( - local_frontier_displacements.back() + local_frontier_sizes.back(), handle.get_stream()); + aggregate_local_key_list = allocate_dataframe_buffer( + local_key_list_displacements.back() + local_key_list_sizes.back(), handle.get_stream()); device_allgatherv(minor_comm, - frontier.begin(), - get_dataframe_buffer_begin(*aggregate_local_frontier), - local_frontier_sizes, - local_frontier_displacements, + key_list.begin(), + get_dataframe_buffer_begin(*aggregate_local_key_list), + local_key_list_sizes, + local_key_list_displacements, handle.get_stream()); } @@ -339,66 +339,66 @@ per_v_random_select_transform_e(raft::handle_t const& handle, rmm::device_uvector sample_local_nbr_indices(0, handle.get_stream()); std::optional> sample_key_indices{std::nullopt}; - std::vector local_frontier_sample_offsets{}; + std::vector local_key_list_sample_offsets{}; if constexpr (std::is_same_v>) { - std::tie(sample_local_nbr_indices, sample_key_indices, local_frontier_sample_offsets) = + std::tie(sample_local_nbr_indices, sample_key_indices, local_key_list_sample_offsets) = uniform_sample_and_compute_local_nbr_indices( handle, graph_view, - (minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_frontier) - : frontier.begin(), - local_frontier_displacements, - local_frontier_sizes, + (minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_key_list) + : key_list.begin(), + local_key_list_displacements, + local_key_list_sizes, rng_state, K, with_replacement); } else { - std::tie(sample_local_nbr_indices, sample_key_indices, local_frontier_sample_offsets) = + std::tie(sample_local_nbr_indices, sample_key_indices, local_key_list_sample_offsets) = biased_sample_and_compute_local_nbr_indices( handle, graph_view, - (minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_frontier) - : frontier.begin(), + (minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_key_list) + : key_list.begin(), edge_bias_src_value_input, edge_bias_dst_value_input, edge_bias_value_input, e_bias_op, - local_frontier_displacements, - local_frontier_sizes, + local_key_list_displacements, + local_key_list_sizes, rng_state, K, with_replacement, do_expensive_check); } - std::vector local_frontier_sample_counts(minor_comm_size); - std::adjacent_difference(local_frontier_sample_offsets.begin() + 1, - local_frontier_sample_offsets.end(), - local_frontier_sample_counts.begin()); + std::vector local_key_list_sample_counts(minor_comm_size); + std::adjacent_difference(local_key_list_sample_offsets.begin() + 1, + local_key_list_sample_offsets.end(), + local_key_list_sample_counts.begin()); // 3. transform auto sample_e_op_results = - allocate_dataframe_buffer(local_frontier_sample_offsets.back(), handle.get_stream()); + allocate_dataframe_buffer(local_key_list_sample_offsets.back(), handle.get_stream()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - auto edge_partition_frontier_key_first = - ((minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_frontier) - : frontier.begin()) + - local_frontier_displacements[i]; + auto edge_partition_key_list_first = + ((minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_key_list) + : key_list.begin()) + + local_key_list_displacements[i]; auto edge_partition_sample_local_nbr_index_first = - sample_local_nbr_indices.begin() + local_frontier_sample_offsets[i]; + sample_local_nbr_indices.begin() + local_key_list_sample_offsets[i]; auto edge_partition_sample_e_op_result_first = - get_dataframe_buffer_begin(sample_e_op_results) + local_frontier_sample_offsets[i]; + get_dataframe_buffer_begin(sample_e_op_results) + local_key_list_sample_offsets[i]; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; @@ -415,14 +415,14 @@ per_v_random_select_transform_e(raft::handle_t const& handle, if (sample_key_indices) { auto edge_partition_sample_key_index_first = - (*sample_key_indices).begin() + local_frontier_sample_offsets[i]; + (*sample_key_indices).begin() + local_key_list_sample_offsets[i]; thrust::transform( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(local_frontier_sample_counts[i]), + thrust::make_counting_iterator(local_key_list_sample_counts[i]), edge_partition_sample_e_op_result_first, transform_local_nbr_indices_t{ edge_partition, thrust::make_optional(edge_partition_sample_key_index_first), - edge_partition_frontier_key_first, + edge_partition_key_list_first, edge_partition_sample_local_nbr_index_first, edge_partition_src_value_input, edge_partition_dst_value_input, @@ -444,10 +444,10 @@ per_v_random_select_transform_e(raft::handle_t const& handle, thrust::transform( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(frontier.size() * K), + thrust::make_counting_iterator(key_list.size() * K), edge_partition_sample_e_op_result_first, transform_local_nbr_indices_t{edge_partition, thrust::nullopt, - edge_partition_frontier_key_first, + edge_partition_key_list_first, edge_partition_sample_local_nbr_index_first, edge_partition_src_value_input, edge_partition_dst_value_input, @@ -466,13 +466,13 @@ per_v_random_select_transform_e(raft::handle_t const& handle, K}); } } - aggregate_local_frontier = std::nullopt; + aggregate_local_key_list = std::nullopt; // 4. shuffle randomly selected & transformed results and update sample_offsets auto sample_offsets = invalid_value ? std::nullopt : std::make_optional>( - frontier.size() + 1, handle.get_stream()); + key_list.size() + 1, handle.get_stream()); assert(K <= std::numeric_limits::max()); if (minor_comm_size > 1) { sample_local_nbr_indices.resize(0, handle.get_stream()); @@ -483,12 +483,12 @@ per_v_random_select_transform_e(raft::handle_t const& handle, std::tie(sample_e_op_results, std::ignore) = shuffle_values(minor_comm, get_dataframe_buffer_begin(sample_e_op_results), - local_frontier_sample_counts, + local_key_list_sample_counts, handle.get_stream()); std::tie(sample_key_indices, std::ignore) = shuffle_values( - minor_comm, (*sample_key_indices).begin(), local_frontier_sample_counts, handle.get_stream()); + minor_comm, (*sample_key_indices).begin(), local_key_list_sample_counts, handle.get_stream()); - rmm::device_uvector sample_counts(frontier.size(), handle.get_stream()); + rmm::device_uvector sample_counts(key_list.size(), handle.get_stream()); thrust::fill( handle.get_thrust_policy(), sample_counts.begin(), sample_counts.end(), int32_t{0}); auto sample_intra_partition_displacements = @@ -504,7 +504,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_counts.resize(0, handle.get_stream()); sample_counts.shrink_to_fit(handle.get_stream()); - resize_dataframe_buffer(tmp_sample_e_op_results, frontier.size() * K, handle.get_stream()); + resize_dataframe_buffer(tmp_sample_e_op_results, key_list.size() * K, handle.get_stream()); thrust::fill(handle.get_thrust_policy(), get_dataframe_buffer_begin(tmp_sample_e_op_results), get_dataframe_buffer_end(tmp_sample_e_op_results), @@ -553,7 +553,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_e_op_results = std::move(tmp_sample_e_op_results); } else { if (!invalid_value) { - rmm::device_uvector sample_counts(frontier.size(), handle.get_stream()); + rmm::device_uvector sample_counts(key_list.size(), handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), sample_counts.begin(), @@ -597,8 +597,8 @@ per_v_random_select_transform_e(raft::handle_t const& handle, * @brief Randomly select and transform the input (tagged-)vertices' outgoing edges with biases. * * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexFrontierBucketType Type of the vertex frontier bucket class which abstracts the - * current (tagged-)vertex frontier. + * @tparam KeyBucketType Type of the key bucket class which abstracts the current (tagged-)vertex + * list. * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. @@ -609,8 +609,8 @@ per_v_random_select_transform_e(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param frontier VertexFrontierBucketType class object to store the (tagged-)vertex list to sample - * outgoing edges. + * @param key_list KeyBucketType class object to store the (tagged-)vertex list to sample outgoing + * edges. * @param edge_src_value_input Wrapper used to access source input property values (for the edge * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() @@ -647,11 +647,11 @@ per_v_random_select_transform_e(raft::handle_t const& handle, * @return std::tuple Tuple of an optional offset vector of type * std::optional> and a dataframe buffer storing the output values of * type @p T from the selected edges. If @p invalid_value is std::nullopt, the offset vector is - * valid and has the size of @p frontier.size() + 1. If @p invalid_value.has_value() is true, - * std::nullopt is returned (the dataframe buffer will store @p frontier.size() * @p K elements). + * valid and has the size of @p key_list.size() + 1. If @p invalid_value.has_value() is true, + * std::nullopt is returned (the dataframe buffer will store @p key_list.size() * @p K elements). */ template >, decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{}))> per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier, + KeyBucketType const& key_list, EdgeBiasSrcValueInputWrapper edge_bias_src_value_input, EdgeBiasDstValueInputWrapper edge_bias_dst_value_input, EdgeBiasValueInputWrapper edge_bias_value_input, @@ -682,7 +682,7 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, { return detail::per_v_random_select_transform_e(handle, graph_view, - frontier, + key_list, edge_bias_src_value_input, edge_bias_dst_value_input, edge_bias_value_input, @@ -705,8 +705,8 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, * (uniform neighbor sampling). * * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexFrontierBucketType Type of the vertex frontier bucket class which abstracts the - * current (tagged-)vertex frontier. + * @tparam KeyBucketType Type of the key bucket class which abstracts the current (tagged-)vertex + * list. * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. @@ -715,8 +715,8 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param frontier VertexFrontierBucketType class object to store the (tagged-)vertex list to sample - * outgoing edges. + * @param key_list KeyBucketType class object to store the (tagged-)vertex list to sample outgoing + * edges. * @param edge_src_value_input Wrapper used to access source input property values (for the edge * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() @@ -747,11 +747,11 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, * @return std::tuple Tuple of an optional offset vector of type * std::optional> and a dataframe buffer storing the output values of * type @p T from the selected edges. If @p invalid_value is std::nullopt, the offset vector is - * valid and has the size of @p frontier.size() + 1. If @p invalid_value.has_value() is true, - * std::nullopt is returned (the dataframe buffer will store @p frontier.size() * @p K elements). + * valid and has the size of @p key_list.size() + 1. If @p invalid_value.has_value() is true, + * std::nullopt is returned (the dataframe buffer will store @p key_list.size() * @p K elements). */ template >, decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{}))> per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier, + KeyBucketType const& key_list, EdgeSrcValueInputWrapper edge_src_value_input, EdgeDstValueInputWrapper edge_dst_value_input, EdgeValueInputWrapper edge_value_input, @@ -775,7 +775,7 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, return detail::per_v_random_select_transform_e( handle, graph_view, - frontier, + key_list, edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), edge_dummy_property_t{}.view(), @@ -783,7 +783,7 @@ per_v_random_select_transform_outgoing_e(raft::handle_t const& handle, detail::edge_endpoint_dummy_property_view_t, detail::edge_endpoint_dummy_property_view_t, edge_dummy_property_view_t, - typename VertexFrontierBucketType::key_type>{}, + typename KeyBucketType::key_type>{}, edge_src_value_input, edge_dst_value_input, edge_value_input, diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 5a5e9332094..c13816242bc 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -924,11 +924,12 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto values_for_unique_keys = allocate_dataframe_buffer(0, handle.get_stream()); std::tie(unique_minor_keys, values_for_unique_keys) = - collect_values_for_unique_keys(handle, + collect_values_for_unique_keys(comm, kv_store_view, std::move(unique_minor_keys), cugraph::detail::compute_gpu_id_from_ext_vertex_t{ - comm_size, major_comm_size, minor_comm_size}); + comm_size, major_comm_size, minor_comm_size}, + handle.get_stream()); if constexpr (KVStoreViewType::binary_search) { multi_gpu_minor_key_value_map_ptr = diff --git a/cpp/src/prims/per_v_transform_reduce_if_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_if_incoming_outgoing_e.cuh new file mode 100644 index 00000000000..1e0d366429e --- /dev/null +++ b/cpp/src/prims/per_v_transform_reduce_if_incoming_outgoing_e.cuh @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "prims/detail/per_v_transform_reduce_e.cuh" +#include "prims/vertex_frontier.cuh" + +#include +#include +#include + +#include + +#include +#include + +namespace cugraph { + +/** + * @brief Iterate over every vertex's incoming edges to update vertex properties. + * + * This function is inspired by thrust::transform_reduce. In addition, this function excludes the + * edges that return false when the predicate @p pred_op is applied. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. + * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. + * @tparam EdgeOp Type of the quinary edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam PredOp Type of the quinary predicate operator. + * @tparam T Type of the initial value for per-vertex reduction. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param edge_src_value_input Wrapper used to access source input property values (for the edge + * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() + * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() + * (if @p e_op does not access source property values). Use update_edge_src_property to + * fill the wrapper. + * @param edge_dst_value_input Wrapper used to access destination input property values (for the + * edge destinations assigned to this process in multi-GPU). Use either + * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or + * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property + * values). Use update_edge_dst_property to fill the wrapper. + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to + * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not + * access edge property values). + * @param e_op Quinary operator takes edge source, edge destination, property values for the source, + * destination, and edge and returns a value to be reduced. + * @param init Initial value to be added to the reduced @p e_op return values for each vertex. + * If @p reduce_op is cugraph::reduce_op::any, init value is never selected except for the + * (tagged-)vertices with 0 outgoing edges. + * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. + * There are pre-defined reduction operators in src/prims/reduce_op.cuh. It is + * recommended to use the pre-defined reduction operators whenever possible as the current (and + * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has + * known member variables) to take a more optimized code path. See the documentation in the + * reduce_op.cuh file for instructions on writing custom reduction operators. + * @param pred_op Quinary operator takes edge source, edge destination, property values for the + * source, destination, and edge and returns whether this edge should be included (if true is + * returned) or excluded. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.local_vertex_partition_range_size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void per_v_transform_reduce_if_incoming_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + T init, + ReduceOp reduce_op, + PredOp pred_op, + VertexValueOutputIterator vertex_value_output_first, + bool do_expensive_check = false) +{ + if (do_expensive_check) { + // currently, nothing to do + } + + constexpr bool incoming = true; + + detail::per_v_transform_reduce_e(handle, + graph_view, + static_cast(nullptr), + static_cast(nullptr), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + pred_op, + vertex_value_output_first); +} + +/** + * @brief For each (tagged-)vertex in the input (tagged-)vertex list, iterate over the incoming + * edges to update (tagged-)vertex properties. + * + * This function is inspired by thrust::transform_reduce(). In addition, this function excludes the + * edges that return false when the predicate @p pred_op is applied. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam KeyBucketType Type of the key bucket class which abstracts the current (tagged-)vertex + * list. + * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. + * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. + * @tparam EdgeOp Type of the quinary edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam PredOp Type of the quinary predicate operator. + * @tparam T Type of the initial value for per-vertex reduction. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param key_list KeyBucketType class object to store the (tagged-)vertex list to update + * (tagged-)vertex properties. + * @param edge_src_value_input Wrapper used to access source input property values (for the edge + * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() + * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() + * (if @p e_op does not access source property values). Use update_edge_src_property to fill the + * wrapper. + * @param edge_dst_value_input Wrapper used to access destination input property values (for the + * edge destinations assigned to this process in multi-GPU). Use either + * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or + * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property + * values). Use update_edge_dst_property to fill the wrapper. + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to + * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not + * access edge property values). + * @param e_op Quinary operator takes edge source, edge destination, property values for the source, + * destination, and edge and returns a value to be reduced. + * @param init Initial value to be reduced with the reduced @p e_op return values for each vertex. + * If @p reduce_op is cugraph::reduce_op::any, init value is never selected except for the + * (tagged-)vertices with 0 incoming edges. + * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. + * There are pre-defined reduction operators in src/prims/reduce_op.cuh. It is + * recommended to use the pre-defined reduction operators whenever possible as the current (and + * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has + * known member variables) to take a more optimized code path. See the documentation in the + * reduce_op.cuh file for instructions on writing custom reduction operators. + * @param pred_op Quinary operator takes edge source, edge destination, property values for the + * source, destination, and edge and returns whether this edge should be included (if true is + * returned) or excluded. + * @param vertex_value_output_first Iterator pointing to the (tagged-)vertex property variables for + * the first (inclusive) (tagged-)vertex in @p key_list. `vertex_value_output_last` (exclusive) is + * deduced as @p vertex_value_output_first + @p key_list.size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void per_v_transform_reduce_if_incoming_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + KeyBucketType const& key_list, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + T init, + ReduceOp reduce_op, + PredOp pred_op, + VertexValueOutputIterator vertex_value_output_first, + bool do_expensive_check = false) +{ + static_assert(GraphViewType::is_storage_transposed); + + if (do_expensive_check) { + // currently, nothing to do + } + + constexpr bool incoming = true; + + detail::per_v_transform_reduce_e(handle, + graph_view, + key_list.begin(), + key_list.end(), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + pred_op, + vertex_value_output_first); +} + +/** + * @brief Iterate over every vertex's outgoing edges to update vertex properties. + * + * This function is inspired by thrust::transform_reduce(). In addition, this function excludes the + * edges that return false when the predicate @p pred_op is applied. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. + * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. + * @tparam EdgeOp Type of the quinary edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam PredOp Type of the quinary predicate operator. + * @tparam T Type of the initial value for per-vertex reduction. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param edge_src_value_input Wrapper used to access source input property values (for the edge + * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() + * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() + * (if @p e_op does not access source property values). Use update_edge_src_property to fill the + * wrapper. + * @param edge_dst_value_input Wrapper used to access destination input property values (for the + * edge destinations assigned to this process in multi-GPU). Use either + * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or + * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property + * values). Use update_edge_dst_property to fill the wrapper. + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to + * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not + * access edge property values). + * @param e_op Quinary operator takes edge source, edge destination, property values for the source, + * destination, and edge and returns a value to be reduced. + * @param init Initial value to be added to the reduced @p e_op return values for each vertex. + * If @p reduce_op is cugraph::reduce_op::any, init value is never selected except for the + * (tagged-)vertices with 0 outgoing edges. + * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. + * There are pre-defined reduction operators in src/prims/reduce_op.cuh. It is + * recommended to use the pre-defined reduction operators whenever possible as the current (and + * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has + * known member variables) to take a more optimized code path. See the documentation in the + * reduce_op.cuh file for instructions on writing custom reduction operators. + * @param pred_op Quinary operator takes edge source, edge destination, property values for the + * source, destination, and edge and returns whether this edge should be included (if true is + * returned) or excluded. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the + * first (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.local_vertex_partition_range_size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void per_v_transform_reduce_if_outgoing_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + T init, + ReduceOp reduce_op, + PredOp pred_op, + VertexValueOutputIterator vertex_value_output_first, + bool do_expensive_check = false) +{ + if (do_expensive_check) { + // currently, nothing to do + } + + constexpr bool incoming = false; + + detail::per_v_transform_reduce_e(handle, + graph_view, + static_cast(nullptr), + static_cast(nullptr), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + pred_op, + vertex_value_output_first); +} + +/** + * @brief For each (tagged-)vertex in the input (tagged-)vertex list, iterate over the outgoing + * edges to update (tagged-)vertex properties. + * + * This function is inspired by thrust::transform_reduce(). In addition, this function excludes the + * edges that return false when the predicate @p pred_op is applied. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam KeyBucketType Type of the key bucket class which abstracts the current (tagged-)vertex + * list. + * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. + * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. + * @tparam EdgeOp Type of the quinary edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam PredOp Type of the quinary predicate operator. + * @tparam T Type of the initial value for per-vertex reduction. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param key_list KeyBucketType class object to store the (tagged-)vertex list to update + * (tagged-)vertex properties. + * @param edge_src_value_input Wrapper used to access source input property values (for the edge + * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() + * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() + * (if @p e_op does not access source property values). Use update_edge_src_property to fill the + * wrapper. + * @param edge_dst_value_input Wrapper used to access destination input property values (for the + * edge destinations assigned to this process in multi-GPU). Use either + * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or + * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property + * values). Use update_edge_dst_property to fill the wrapper. + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to + * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not + * access edge property values). + * @param e_op Quinary operator takes edge source, edge destination, property values for the source, + * destination, and edge and returns a value to be reduced. + * @param init Initial value to be reduced with the reduced @p e_op return values for each vertex. + * If @p reduce_op is cugraph::reduce_op::any, init value is never selected except for the + * (tagged-)vertices with 0 outgoing edges. + * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. + * There are pre-defined reduction operators in src/prims/reduce_op.cuh. It is + * recommended to use the pre-defined reduction operators whenever possible as the current (and + * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has + * known member variables) to take a more optimized code path. See the documentation in the + * reduce_op.cuh file for instructions on writing custom reduction operators. + * @param pred_op Quinary operator takes edge source, edge destination, property values for the + * source, destination, and edge and returns whether this edge should be included (if true is + * returned) or excluded. + * @param vertex_value_output_first Iterator pointing to the (tagged-)vertex property variables for + * the first (inclusive) (tagged-)vertex in @p key_list. `vertex_value_output_last` (exclusive) is + * deduced as @p vertex_value_output_first + @p key_list.size(). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void per_v_transform_reduce_if_outgoing_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + KeyBucketType const& key_list, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + T init, + ReduceOp reduce_op, + PredOp pred_op, + VertexValueOutputIterator vertex_value_output_first, + bool do_expensive_check = false) +{ + static_assert(!GraphViewType::is_storage_transposed); + static_assert(KeyBucketType::is_sorted_unique); + + if (do_expensive_check) { + // currently, nothing to do + } + + constexpr bool incoming = false; + + detail::per_v_transform_reduce_e(handle, + graph_view, + key_list.begin(), + key_list.end(), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + pred_op, + vertex_value_output_first); +} + +} // namespace cugraph diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 027ef1f662d..5ba7edec894 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -15,558 +15,165 @@ */ #pragma once -#include "detail/graph_partition_utils.cuh" -#include "prims/detail/prim_functors.cuh" -#include "prims/fill_edge_src_dst_property.cuh" -#include "prims/property_op_utils.cuh" -#include "prims/reduce_op.cuh" +#include "prims/detail/per_v_transform_reduce_e.cuh" +#include "prims/vertex_frontier.cuh" -#include -#include -#include #include #include -#include -#include -#include #include -#include -#include #include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include namespace cugraph { -namespace detail { - -int32_t constexpr per_v_transform_reduce_e_kernel_block_size = 512; - -template -struct transform_and_atomic_reduce_t { - edge_partition_device_view_t const& edge_partition{}; - result_t identity_element{}; - vertex_t const* indices{nullptr}; - TransformOp const& transform_op{}; - ResultValueOutputIteratorOrWrapper& result_value_output{}; - - __device__ void operator()(edge_t i) const - { - auto e_op_result = transform_op(i); - if (e_op_result != identity_element) { - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - if constexpr (multi_gpu) { - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); - } else { - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); - } - } - } -}; - -template -__device__ void update_result_value_output( - edge_partition_device_view_t const& edge_partition, - vertex_t const* indices, - edge_t local_degree, - TransformOp const& transform_op, - result_t init, - ReduceOp const& reduce_op, - size_t output_idx /* relevent only when update_major === true */, - result_t identity_element, - ResultValueOutputIteratorOrWrapper& result_value_output) -{ - if constexpr (update_major) { - *(result_value_output + output_idx) = - thrust::transform_reduce(thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - transform_op, - init, - reduce_op); - } else { - thrust::for_each( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - transform_and_atomic_reduce_t{ - edge_partition, identity_element, indices, transform_op, result_value_output}); - } -} - -template -__global__ static void per_v_transform_reduce_e_hypersparse( - edge_partition_device_view_t edge_partition, - EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, - EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, - EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, - ResultValueOutputIteratorOrWrapper result_value_output, - EdgeOp e_op, - T init /* relevant only if update_major == true */, - T identity_element /* relevant only if update_major == true */, - ReduceOp reduce_op) -{ - static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< - ReduceOp>); // atomic_reduce is defined only when - // has_compatible_raft_comms_op_t is true - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto major_start_offset = static_cast(*(edge_partition.major_hypersparse_first()) - - edge_partition.major_range_first()); - auto idx = static_cast(tid); - - auto dcs_nzd_vertex_count = *(edge_partition.dcs_nzd_vertex_count()); - - while (idx < static_cast(dcs_nzd_vertex_count)) { - auto major = - *(edge_partition.major_from_major_hypersparse_idx_nocheck(static_cast(idx))); - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - auto major_idx = - major_start_offset + idx; // major_offset != major_idx in the hypersparse region - vertex_t const* indices{nullptr}; - edge_t edge_offset{}; - edge_t local_degree{}; - thrust::tie(indices, edge_offset, local_degree) = - edge_partition.local_edges(static_cast(major_idx)); - - auto call_e_op = call_e_op_t{edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op, - major, - major_offset, - indices, - edge_offset}; - - if (edge_partition_e_mask) { - auto transform_op = - [&edge_partition_e_mask, &call_e_op, identity_element, edge_offset] __device__(auto i) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { - return call_e_op(i); - } else { - return identity_element; - } - }; - - update_result_value_output(edge_partition, - indices, - local_degree, - transform_op, - init, - reduce_op, - major - *(edge_partition).major_hypersparse_first(), - identity_element, - result_value_output); - } else { - update_result_value_output(edge_partition, - indices, - local_degree, - call_e_op, - init, - reduce_op, - major - *(edge_partition).major_hypersparse_first(), - identity_element, - result_value_output); - } - idx += gridDim.x * blockDim.x; - } -} - -template -__global__ static void per_v_transform_reduce_e_low_degree( - edge_partition_device_view_t edge_partition, - typename GraphViewType::vertex_type major_range_first, - typename GraphViewType::vertex_type major_range_last, - EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, - EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, - EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, - ResultValueOutputIteratorOrWrapper result_value_output, - EdgeOp e_op, - T init /* relevant only if update_major == true */, - T identity_element /* relevant only if update_major == true */, - ReduceOp reduce_op) -{ - static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< - ReduceOp>); // atomic_reduce is defined only when - // has_compatible_raft_comms_op_t is true - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto major_start_offset = - static_cast(major_range_first - edge_partition.major_range_first()); - auto idx = static_cast(tid); - - while (idx < static_cast(major_range_last - major_range_first)) { - auto major_offset = static_cast(major_start_offset + idx); - auto major = edge_partition.major_from_major_offset_nocheck(major_offset); - vertex_t const* indices{nullptr}; - edge_t edge_offset{}; - edge_t local_degree{}; - thrust::tie(indices, edge_offset, local_degree) = - edge_partition.local_edges(static_cast(major_offset)); - - auto call_e_op = call_e_op_t{edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op, - major, - major_offset, - indices, - edge_offset}; - - if (edge_partition_e_mask) { - auto transform_op = - [&edge_partition_e_mask, &call_e_op, identity_element, edge_offset] __device__(auto i) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { - return call_e_op(i); - } else { - return identity_element; - } - }; - - update_result_value_output(edge_partition, - indices, - local_degree, - transform_op, - init, - reduce_op, - idx, - identity_element, - result_value_output); - } else { - update_result_value_output(edge_partition, - indices, - local_degree, - call_e_op, - init, - reduce_op, - idx, - identity_element, - result_value_output); - } - idx += gridDim.x * blockDim.x; - } -} - -template -__global__ static void per_v_transform_reduce_e_mid_degree( - edge_partition_device_view_t edge_partition, - typename GraphViewType::vertex_type major_range_first, - typename GraphViewType::vertex_type major_range_last, - EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, - EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, - EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, - ResultValueOutputIteratorOrWrapper result_value_output, - EdgeOp e_op, - T init /* relevant only if update_major == true */, - T identity_element /* relevant only if update_major == true */, - ReduceOp reduce_op) + typename T, + typename VertexValueOutputIterator> +void per_v_transform_reduce_incoming_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + T init, + ReduceOp reduce_op, + VertexValueOutputIterator vertex_value_output_first, + bool do_expensive_check = false) { - static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< - ReduceOp>); // atomic_reduce is defined only when - // has_compatible_raft_comms_op_t is true - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using e_op_result_t = T; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - static_assert(per_v_transform_reduce_e_kernel_block_size % raft::warp_size() == 0); - auto const lane_id = tid % raft::warp_size(); - auto major_start_offset = - static_cast(major_range_first - edge_partition.major_range_first()); - auto idx = static_cast(tid / raft::warp_size()); - - using WarpReduce = cub::WarpReduce; - [[maybe_unused]] __shared__ typename WarpReduce::TempStorage - temp_storage[per_v_transform_reduce_e_kernel_block_size / - raft::warp_size()]; // relevant only if update_major == true - - while (idx < static_cast(major_range_last - major_range_first)) { - auto major_offset = static_cast(major_start_offset + idx); - auto major = edge_partition.major_from_major_offset_nocheck(major_offset); - vertex_t const* indices{nullptr}; - edge_t edge_offset{}; - edge_t local_degree{}; - thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); - - auto call_e_op = call_e_op_t{edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op, - major, - major_offset, - indices, - edge_offset}; - - [[maybe_unused]] auto reduced_e_op_result = - lane_id == 0 ? init : identity_element; // relevant only if update_major == true - if (edge_partition_e_mask) { - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { - auto e_op_result = call_e_op(i); - if constexpr (update_major) { - reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); - } else { - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); - if constexpr (GraphViewType::is_multi_gpu) { - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); - } else { - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); - } - } - } - } - } else { - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { - auto e_op_result = call_e_op(i); - if constexpr (update_major) { - reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); - } else { - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); - if constexpr (GraphViewType::is_multi_gpu) { - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); - } else { - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); - } - } - } - } - - if constexpr (update_major) { - reduced_e_op_result = WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) - .Reduce(reduced_e_op_result, reduce_op); - if (lane_id == 0) { *(result_value_output + idx) = reduced_e_op_result; } - } - - idx += gridDim.x * (blockDim.x / raft::warp_size()); + if (do_expensive_check) { + // currently, nothing to do } -} - -template -__global__ static void per_v_transform_reduce_e_high_degree( - edge_partition_device_view_t edge_partition, - typename GraphViewType::vertex_type major_range_first, - typename GraphViewType::vertex_type major_range_last, - EdgePartitionSrcValueInputWrapper edge_partition_src_value_input, - EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, - EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, - thrust::optional edge_partition_e_mask, - ResultValueOutputIteratorOrWrapper result_value_output, - EdgeOp e_op, - T init /* relevant only if update_major == true */, - T identity_element /* relevant only if update_major == true */, - ReduceOp reduce_op) -{ - static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< - ReduceOp>); // atomic_reduce is defined only when - // has_compatible_raft_comms_op_t is true - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using e_op_result_t = T; - auto major_start_offset = - static_cast(major_range_first - edge_partition.major_range_first()); - auto idx = static_cast(blockIdx.x); - - using BlockReduce = cub::BlockReduce; - [[maybe_unused]] __shared__ - typename BlockReduce::TempStorage temp_storage; // relevant only if update_major == true - - while (idx < static_cast(major_range_last - major_range_first)) { - auto major_offset = static_cast(major_start_offset + idx); - auto major = edge_partition.major_from_major_offset_nocheck(major_offset); - vertex_t const* indices{nullptr}; - edge_t edge_offset{}; - edge_t local_degree{}; - thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); - - auto call_e_op = call_e_op_t{edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - e_op, - major, - major_offset, - indices, - edge_offset}; - - [[maybe_unused]] auto reduced_e_op_result = - threadIdx.x == 0 ? init : identity_element; // relevant only if update_major == true - if (edge_partition_e_mask) { - for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { - auto e_op_result = call_e_op(i); - if constexpr (update_major) { - reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); - } else { - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); - if constexpr (GraphViewType::is_multi_gpu) { - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); - } else { - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); - } - } - } - } - } else { - for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { - auto e_op_result = call_e_op(i); - if constexpr (update_major) { - reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); - } else { - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); - if constexpr (GraphViewType::is_multi_gpu) { - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); - } else { - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); - } - } - } - } - - if constexpr (update_major) { - reduced_e_op_result = BlockReduce(temp_storage).Reduce(reduced_e_op_result, reduce_op); - if (threadIdx.x == 0) { *(result_value_output + idx) = reduced_e_op_result; } - } - - idx += gridDim.x; - } + constexpr bool incoming = true; + + detail::per_v_transform_reduce_e( + handle, + graph_view, + static_cast(nullptr), + static_cast(nullptr), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + detail::const_true_e_op_t{}, + vertex_value_output_first); } -template -void per_v_transform_reduce_e(raft::handle_t const& handle, - GraphViewType const& graph_view, - EdgeSrcValueInputWrapper edge_src_value_input, - EdgeDstValueInputWrapper edge_dst_value_input, - EdgeValueInputWrapper edge_value_input, - EdgeOp e_op, - T init, - ReduceOp reduce_op, - VertexValueOutputIterator vertex_value_output_first) +void per_v_transform_reduce_incoming_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + KeyBucketType const& key_list, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + T init, + ReduceOp reduce_op, + VertexValueOutputIterator vertex_value_output_first, + bool do_expensive_check = false) { - static_assert(ReduceOp::pure_function && reduce_op::has_compatible_raft_comms_op_v && - reduce_op::has_identity_element_v); // current restriction, to support - // general reduction, we may need to - // take a less efficient code path - - constexpr auto update_major = (incoming == GraphViewType::is_storage_transposed); - [[maybe_unused]] constexpr auto max_segments = - detail::num_sparse_segments_per_vertex_partition + size_t{1}; - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; + static_assert(GraphViewType::is_storage_transposed); - using edge_partition_src_input_device_view_t = std::conditional_t< - std::is_same_v, - detail::edge_partition_endpoint_dummy_property_device_view_t, - detail::edge_partition_endpoint_property_device_view_t< - vertex_t, - typename EdgeSrcValueInputWrapper::value_iterator, - typename EdgeSrcValueInputWrapper::value_type>>; - using edge_partition_dst_input_device_view_t = std::conditional_t< - std::is_same_v, - detail::edge_partition_endpoint_dummy_property_device_view_t, - detail::edge_partition_endpoint_property_device_view_t< - vertex_t, - typename EdgeDstValueInputWrapper::value_iterator, - typename EdgeDstValueInputWrapper::value_type>>; - using edge_partition_e_input_device_view_t = std::conditional_t< - std::is_same_v, - detail::edge_partition_edge_dummy_property_device_view_t, - detail::edge_partition_edge_property_device_view_t< - edge_t, - typename EdgeValueInputWrapper::value_iterator, - typename EdgeValueInputWrapper::value_type>>; - - static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); - - using minor_tmp_buffer_type = std::conditional_t, - edge_dst_property_t>; - [[maybe_unused]] std::unique_ptr minor_tmp_buffer{}; - if constexpr (GraphViewType::is_multi_gpu && !update_major) { - minor_tmp_buffer = std::make_unique(handle, graph_view); - } - - using edge_partition_minor_output_device_view_t = - std::conditional_tmutable_view().value_first())>, - void /* dummy */>; - - if constexpr (update_major) { - size_t partition_idx = 0; - if constexpr (GraphViewType::is_multi_gpu) { - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_rank = minor_comm.get_rank(); - partition_idx = static_cast(minor_comm_rank); - } - auto segment_offsets = graph_view.local_edge_partition_segment_offsets(partition_idx); - if (segment_offsets) { // no vertices in the zero degree segment are visited - thrust::fill(handle.get_thrust_policy(), - vertex_value_output_first + *((*segment_offsets).rbegin() + 1), - vertex_value_output_first + *((*segment_offsets).rbegin()), - init); - } - } else { - if constexpr (GraphViewType::is_multi_gpu) { - auto minor_init = init; - auto view = minor_tmp_buffer->view(); - if (view.keys()) { // defer applying the initial value to the end as minor_tmp_buffer may not - // store values for the entire minor range - minor_init = ReduceOp::identity_element; - } else { - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_rank = major_comm.get_rank(); - minor_init = (major_comm_rank == 0) ? init : ReduceOp::identity_element; - } - fill_edge_minor_property(handle, graph_view, minor_tmp_buffer->mutable_view(), minor_init); - } else { - thrust::fill(handle.get_thrust_policy(), - vertex_value_output_first, - vertex_value_output_first + graph_view.local_vertex_partition_range_size(), - init); - } - } - - std::optional> stream_pool_indices{std::nullopt}; - if constexpr (GraphViewType::is_multi_gpu) { - if ((graph_view.local_edge_partition_segment_offsets(0)) && - (handle.get_stream_pool_size() >= max_segments)) { - for (size_t i = 1; i < graph_view.number_of_local_edge_partitions(); ++i) { - assert(graph_view.local_edge_partition_segment_offsets(i)); - } - - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - // memory footprint vs parallelism trade-off - // peak memory requirement per loop is - // update_major ? V / comm_size * sizeof(T) : 0 - // and limit memory requirement to (E / comm_size) * sizeof(vertex_t) - - size_t num_streams = - std::min(static_cast(minor_comm_size) * max_segments, - raft::round_down_safe(handle.get_stream_pool_size(), max_segments)); - if constexpr (update_major) { - size_t value_size{0}; - if constexpr (is_thrust_tuple_of_arithmetic::value) { - auto elem_sizes = compute_thrust_tuple_element_sizes{}(); - value_size = std::reduce(elem_sizes.begin(), elem_sizes.end()); - } else { - value_size = sizeof(T); - } - - auto avg_vertex_degree = - graph_view.number_of_vertices() > 0 - ? (static_cast(graph_view.compute_number_of_edges(handle)) / - static_cast(graph_view.number_of_vertices())) - : double{0.0}; - - num_streams = - std::min(static_cast(avg_vertex_degree * (static_cast(sizeof(vertex_t)) / - static_cast(value_size))) * - max_segments, - num_streams); - } - - if (num_streams >= max_segments) { - stream_pool_indices = std::vector(num_streams); - std::iota((*stream_pool_indices).begin(), (*stream_pool_indices).end(), size_t{0}); - handle.sync_stream(); - } - } - } - - std::vector(0, rmm::cuda_stream_view{}))> - major_tmp_buffers{}; - if constexpr (GraphViewType::is_multi_gpu && update_major) { - std::vector major_tmp_buffer_sizes(graph_view.number_of_local_edge_partitions(), - size_t{0}); - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); - if (segment_offsets) { - major_tmp_buffer_sizes[i] = - *((*segment_offsets).rbegin() + 1); // exclude the zero degree segment - } else { - if constexpr (GraphViewType::is_storage_transposed) { - major_tmp_buffer_sizes[i] = graph_view.local_edge_partition_dst_range_size(i); - } else { - major_tmp_buffer_sizes[i] = graph_view.local_edge_partition_src_range_size(i); - } - } - } - if (stream_pool_indices) { - auto num_concurrent_loops = (*stream_pool_indices).size() / max_segments; - major_tmp_buffers.reserve(num_concurrent_loops); - for (size_t i = 0; i < num_concurrent_loops; ++i) { - size_t max_size{0}; - for (size_t j = i; j < graph_view.number_of_local_edge_partitions(); - j += num_concurrent_loops) { - max_size = std::max(major_tmp_buffer_sizes[j], max_size); - } - major_tmp_buffers.push_back(allocate_dataframe_buffer(max_size, handle.get_stream())); - } - } else { - major_tmp_buffers.reserve(1); - major_tmp_buffers.push_back(allocate_dataframe_buffer( - *std::max_element(major_tmp_buffer_sizes.begin(), major_tmp_buffer_sizes.end()), - handle.get_stream())); - } - } else { // dummy - major_tmp_buffers.reserve(1); - major_tmp_buffers.push_back(allocate_dataframe_buffer(size_t{0}, handle.get_stream())); - } - - if (stream_pool_indices) { handle.sync_stream(); } - - auto edge_mask_view = graph_view.edge_mask_view(); - - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - auto edge_partition_e_mask = - edge_mask_view - ? thrust::make_optional< - detail::edge_partition_edge_property_device_view_t>( - *edge_mask_view, i) - : thrust::nullopt; - - auto major_init = ReduceOp::identity_element; - if constexpr (update_major) { - if constexpr (GraphViewType::is_multi_gpu) { - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_rank = minor_comm.get_rank(); - major_init = (static_cast(i) == minor_comm_rank) ? init : ReduceOp::identity_element; - } else { - major_init = init; - } - } - - edge_partition_src_input_device_view_t edge_partition_src_value_input{}; - edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; - if constexpr (GraphViewType::is_storage_transposed) { - edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input); - edge_partition_dst_value_input = - edge_partition_dst_input_device_view_t(edge_dst_value_input, i); - } else { - edge_partition_src_value_input = - edge_partition_src_input_device_view_t(edge_src_value_input, i); - edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); - } - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); - - auto major_buffer_first = - get_dataframe_buffer_begin(major_tmp_buffers[i % major_tmp_buffers.size()]); - - std::conditional_t, - VertexValueOutputIterator> - output_buffer{}; - if constexpr (GraphViewType::is_multi_gpu) { - if constexpr (update_major) { - output_buffer = major_buffer_first; - } else { - output_buffer = edge_partition_minor_output_device_view_t(minor_tmp_buffer->mutable_view()); - } - } else { - output_buffer = vertex_value_output_first; - } - - auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); - if (segment_offsets) { - static_assert(detail::num_sparse_segments_per_vertex_partition == 3); - - // FIXME: we may further improve performance by 1) individually tuning block sizes for - // different segments; and 2) adding one more segment for very high degree vertices and - // running segmented reduction - if (edge_partition.dcs_nzd_vertex_count()) { - auto exec_stream = - stream_pool_indices - ? handle.get_stream_from_stream_pool((i * max_segments) % (*stream_pool_indices).size()) - : handle.get_stream(); - - if constexpr (update_major) { // this is necessary as we don't visit every vertex in the - // hypersparse segment - thrust::fill(rmm::exec_policy(exec_stream), - output_buffer + (*segment_offsets)[3], - output_buffer + (*segment_offsets)[4], - major_init); - } - - if (*(edge_partition.dcs_nzd_vertex_count()) > 0) { - raft::grid_1d_thread_t update_grid(*(edge_partition.dcs_nzd_vertex_count()), - detail::per_v_transform_reduce_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*segment_offsets)[3]; } - detail::per_v_transform_reduce_e_hypersparse - <<>>( - edge_partition, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - segment_output_buffer, - e_op, - major_init, - ReduceOp::identity_element, - reduce_op); - } - } - if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { - auto exec_stream = stream_pool_indices - ? handle.get_stream_from_stream_pool((i * max_segments + 1) % - (*stream_pool_indices).size()) - : handle.get_stream(); - raft::grid_1d_thread_t update_grid((*segment_offsets)[3] - (*segment_offsets)[2], - detail::per_v_transform_reduce_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*segment_offsets)[2]; } - detail::per_v_transform_reduce_e_low_degree - <<>>( - edge_partition, - edge_partition.major_range_first() + (*segment_offsets)[2], - edge_partition.major_range_first() + (*segment_offsets)[3], - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - segment_output_buffer, - e_op, - major_init, - ReduceOp::identity_element, - reduce_op); - } - if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { - auto exec_stream = stream_pool_indices - ? handle.get_stream_from_stream_pool((i * max_segments + 2) % - (*stream_pool_indices).size()) - : handle.get_stream(); - raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], - detail::per_v_transform_reduce_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - auto segment_output_buffer = output_buffer; - if constexpr (update_major) { segment_output_buffer += (*segment_offsets)[1]; } - detail::per_v_transform_reduce_e_mid_degree - <<>>( - edge_partition, - edge_partition.major_range_first() + (*segment_offsets)[1], - edge_partition.major_range_first() + (*segment_offsets)[2], - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - segment_output_buffer, - e_op, - major_init, - ReduceOp::identity_element, - reduce_op); - } - if ((*segment_offsets)[1] > 0) { - auto exec_stream = stream_pool_indices - ? handle.get_stream_from_stream_pool((i * max_segments + 3) % - (*stream_pool_indices).size()) - : handle.get_stream(); - raft::grid_1d_block_t update_grid((*segment_offsets)[1], - detail::per_v_transform_reduce_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - detail::per_v_transform_reduce_e_high_degree - <<>>( - edge_partition, - edge_partition.major_range_first(), - edge_partition.major_range_first() + (*segment_offsets)[1], - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - output_buffer, - e_op, - major_init, - ReduceOp::identity_element, - reduce_op); - } - } else { - if (edge_partition.major_range_size() > 0) { - raft::grid_1d_thread_t update_grid(edge_partition.major_range_size(), - detail::per_v_transform_reduce_e_kernel_block_size, - handle.get_device_properties().maxGridSize[0]); - detail::per_v_transform_reduce_e_low_degree - <<>>( - edge_partition, - edge_partition.major_range_first(), - edge_partition.major_range_last(), - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_e_mask, - output_buffer, - e_op, - major_init, - ReduceOp::identity_element, - reduce_op); - } - } - - if constexpr (GraphViewType::is_multi_gpu && update_major) { - auto& comm = handle.get_comms(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_rank = minor_comm.get_rank(); - auto const minor_comm_size = minor_comm.get_size(); - - if (segment_offsets && stream_pool_indices) { - if (edge_partition.dcs_nzd_vertex_count()) { - device_reduce( - minor_comm, - major_buffer_first + (*segment_offsets)[3], - vertex_value_output_first + (*segment_offsets)[3], - (*segment_offsets)[4] - (*segment_offsets)[3], - ReduceOp::compatible_raft_comms_op, - static_cast(i), - handle.get_stream_from_stream_pool((i * max_segments) % (*stream_pool_indices).size())); - } - if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { - device_reduce(minor_comm, - major_buffer_first + (*segment_offsets)[2], - vertex_value_output_first + (*segment_offsets)[2], - (*segment_offsets)[3] - (*segment_offsets)[2], - ReduceOp::compatible_raft_comms_op, - static_cast(i), - handle.get_stream_from_stream_pool((i * max_segments + 1) % - (*stream_pool_indices).size())); - } - if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { - device_reduce(minor_comm, - major_buffer_first + (*segment_offsets)[1], - vertex_value_output_first + (*segment_offsets)[1], - (*segment_offsets)[2] - (*segment_offsets)[1], - ReduceOp::compatible_raft_comms_op, - static_cast(i), - handle.get_stream_from_stream_pool((i * max_segments + 2) % - (*stream_pool_indices).size())); - } - if ((*segment_offsets)[1] > 0) { - device_reduce(minor_comm, - major_buffer_first, - vertex_value_output_first, - (*segment_offsets)[1], - ReduceOp::compatible_raft_comms_op, - static_cast(i), - handle.get_stream_from_stream_pool((i * max_segments + 3) % - (*stream_pool_indices).size())); - } - } else { - size_t reduction_size = static_cast( - segment_offsets ? *((*segment_offsets).rbegin() + 1) /* exclude the zero degree segment */ - : edge_partition.major_range_size()); - device_reduce(minor_comm, - major_buffer_first, - vertex_value_output_first, - reduction_size, - ReduceOp::compatible_raft_comms_op, - static_cast(i), - handle.get_stream()); - } - } - - if (stream_pool_indices && ((i + 1) % major_tmp_buffers.size() == 0)) { - handle.sync_stream_pool( - *stream_pool_indices); // to prevent buffer over-write (this can happen as *segment_offsets - // do not necessarily coincide in different edge partitions). - } + if (do_expensive_check) { + // currently, nothing to do } - if (stream_pool_indices) { handle.sync_stream_pool(*stream_pool_indices); } - - if constexpr (GraphViewType::is_multi_gpu && !update_major) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_rank = major_comm.get_rank(); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_rank = minor_comm.get_rank(); - auto const minor_comm_size = minor_comm.get_size(); - - auto view = minor_tmp_buffer->view(); - if (view.keys()) { // applying the initial value is deferred to here - vertex_t max_vertex_partition_size{0}; - for (int i = 0; i < major_comm_size; ++i) { - auto this_segment_vertex_partition_id = - compute_local_edge_partition_minor_range_vertex_partition_id_t{ - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - max_vertex_partition_size = - std::max(max_vertex_partition_size, - graph_view.vertex_partition_range_size(this_segment_vertex_partition_id)); - } - auto tx_buffer = allocate_dataframe_buffer(max_vertex_partition_size, handle.get_stream()); - auto tx_buffer_first = get_dataframe_buffer_begin(tx_buffer); - std::optional> minor_key_offsets{}; - if constexpr (GraphViewType::is_storage_transposed) { - minor_key_offsets = graph_view.local_sorted_unique_edge_src_vertex_partition_offsets(); - } else { - minor_key_offsets = graph_view.local_sorted_unique_edge_dst_vertex_partition_offsets(); - } - for (int i = 0; i < major_comm_size; ++i) { - auto minor_init = (major_comm_rank == i) ? init : ReduceOp::identity_element; - auto this_segment_vertex_partition_id = - compute_local_edge_partition_minor_range_vertex_partition_id_t{ - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - thrust::fill(handle.get_thrust_policy(), - tx_buffer_first, - tx_buffer_first + - graph_view.vertex_partition_range_size(this_segment_vertex_partition_id), - minor_init); - auto value_first = thrust::make_transform_iterator( - view.value_first(), - cuda::proclaim_return_type( - [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); })); - thrust::scatter(handle.get_thrust_policy(), - value_first + (*minor_key_offsets)[i], - value_first + (*minor_key_offsets)[i + 1], - thrust::make_transform_iterator( - (*(view.keys())).begin() + (*minor_key_offsets)[i], - cuda::proclaim_return_type( - [key_first = graph_view.vertex_partition_range_first( - this_segment_vertex_partition_id)] __device__(auto key) { - return key - key_first; - })), - tx_buffer_first); - device_reduce(major_comm, - tx_buffer_first, - vertex_value_output_first, - static_cast( - graph_view.vertex_partition_range_size(this_segment_vertex_partition_id)), - ReduceOp::compatible_raft_comms_op, - i, - handle.get_stream()); - } - } else { - auto first_segment_vertex_partition_id = - compute_local_edge_partition_minor_range_vertex_partition_id_t{ - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(0); - vertex_t minor_range_first = - graph_view.vertex_partition_range_first(first_segment_vertex_partition_id); - for (int i = 0; i < major_comm_size; ++i) { - auto this_segment_vertex_partition_id = - compute_local_edge_partition_minor_range_vertex_partition_id_t{ - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - auto offset = graph_view.vertex_partition_range_first(this_segment_vertex_partition_id) - - minor_range_first; - device_reduce(major_comm, - view.value_first() + offset, - vertex_value_output_first, - static_cast( - graph_view.vertex_partition_range_size(this_segment_vertex_partition_id)), - ReduceOp::compatible_raft_comms_op, - i, - handle.get_stream()); - } - } - } + constexpr bool incoming = true; + + detail::per_v_transform_reduce_e( + handle, + graph_view, + key_list.begin(), + key_list.end(), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + detail::const_true_e_op_t{}, + vertex_value_output_first); } -} // namespace detail - /** - * @brief Iterate over every vertex's incoming edges to update vertex properties. + * @brief Iterate over every vertex's outgoing edges to update vertex properties. * - * This function is inspired by thrust::transform_reduce. + * This function is inspired by thrust::transform_reduce(). * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. @@ -1131,8 +240,8 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, * @param edge_src_value_input Wrapper used to access source input property values (for the edge * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() - * (if @p e_op does not access source property values). Use update_edge_src_property to - * fill the wrapper. + * (if @p e_op does not access source property values). Use update_edge_src_property to fill the + * wrapper. * @param edge_dst_value_input Wrapper used to access destination input property values (for the * edge destinations assigned to this process in multi-GPU). Use either * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or @@ -1145,14 +254,16 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, * @param e_op Quinary operator takes edge source, edge destination, property values for the source, * destination, and edge and returns a value to be reduced. * @param init Initial value to be added to the reduced @p e_op return values for each vertex. + * If @p reduce_op is cugraph::reduce_op::any, init value is never selected except for the + * (tagged-)vertices with 0 outgoing edges. * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. * There are pre-defined reduction operators in src/prims/reduce_op.cuh. It is * recommended to use the pre-defined reduction operators whenever possible as the current (and * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has * known member variables) to take a more optimized code path. See the documentation in the * reduce_op.cuh file for instructions on writing custom reduction operators. - * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first - * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the + * first (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p * graph_view.local_vertex_partition_range_size(). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). @@ -1165,7 +276,7 @@ template -void per_v_transform_reduce_incoming_e(raft::handle_t const& handle, +void per_v_transform_reduce_outgoing_e(raft::handle_t const& handle, GraphViewType const& graph_view, EdgeSrcValueInputWrapper edge_src_value_input, EdgeDstValueInputWrapper edge_dst_value_input, @@ -1180,23 +291,37 @@ void per_v_transform_reduce_incoming_e(raft::handle_t const& handle, // currently, nothing to do } - detail::per_v_transform_reduce_e(handle, - graph_view, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op, - init, - reduce_op, - vertex_value_output_first); + constexpr bool incoming = false; + + detail::per_v_transform_reduce_e( + handle, + graph_view, + static_cast(nullptr), + static_cast(nullptr), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + detail::const_true_e_op_t{}, + vertex_value_output_first); } /** - * @brief Iterate over every vertex's outgoing edges to update vertex properties. + * @brief For each (tagged-)vertex in the input (tagged-)vertex list, iterate over the outgoing + * edges to update (tagged-)vertex properties. * * This function is inspired by thrust::transform_reduce(). * * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam KeyBucketType Type of the key bucket class which abstracts the current (tagged-)vertex + * list. * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. @@ -1207,6 +332,8 @@ void per_v_transform_reduce_incoming_e(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. + * @param key_list KeyBucketType class object to store the (tagged-)vertex list to update + * (tagged-)vertex properties. * @param edge_src_value_input Wrapper used to access source input property values (for the edge * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() @@ -1223,20 +350,22 @@ void per_v_transform_reduce_incoming_e(raft::handle_t const& handle, * access edge property values). * @param e_op Quinary operator takes edge source, edge destination, property values for the source, * destination, and edge and returns a value to be reduced. - * @param init Initial value to be added to the reduced @p e_op return values for each vertex. + * @param init Initial value to be reduced with the reduced @p e_op return values for each vertex. + * If @p reduce_op is cugraph::reduce_op::any, init value is never selected except for the + * (tagged-)vertices with 0 outgoing edges. * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. * There are pre-defined reduction operators in src/prims/reduce_op.cuh. It is * recommended to use the pre-defined reduction operators whenever possible as the current (and * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has * known member variables) to take a more optimized code path. See the documentation in the * reduce_op.cuh file for instructions on writing custom reduction operators. - * @param vertex_value_output_first Iterator pointing to the vertex property variables for the - * first (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_output_last` - * (exclusive) is deduced as @p vertex_value_output_first + @p - * graph_view.local_vertex_partition_range_size(). + * @param vertex_value_output_first Iterator pointing to the (tagged-)vertex property variables for + * the first (inclusive) (tagged-)vertex in @p key_list. `vertex_value_output_last` (exclusive) is + * deduced as @p vertex_value_output_first + @p key_list.size(). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template void per_v_transform_reduce_outgoing_e(raft::handle_t const& handle, GraphViewType const& graph_view, + KeyBucketType const& key_list, EdgeSrcValueInputWrapper edge_src_value_input, EdgeDstValueInputWrapper edge_dst_value_input, EdgeValueInputWrapper edge_value_input, @@ -1255,19 +385,33 @@ void per_v_transform_reduce_outgoing_e(raft::handle_t const& handle, VertexValueOutputIterator vertex_value_output_first, bool do_expensive_check = false) { + static_assert(!GraphViewType::is_storage_transposed); + static_assert(KeyBucketType::is_sorted_unique); + if (do_expensive_check) { // currently, nothing to do } - detail::per_v_transform_reduce_e(handle, - graph_view, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op, - init, - reduce_op, - vertex_value_output_first); + constexpr bool incoming = false; + + detail::per_v_transform_reduce_e( + handle, + graph_view, + key_list.begin(), + key_list.end(), + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + init, + reduce_op, + detail::const_true_e_op_t{}, + vertex_value_output_first); } } // namespace cugraph diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh new file mode 100644 index 00000000000..87f590f571f --- /dev/null +++ b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh @@ -0,0 +1,1196 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "detail/graph_partition_utils.cuh" +#include "prims/detail/extract_transform_v_frontier_e.cuh" +#include "prims/detail/prim_utils.cuh" +#include "prims/property_op_utils.cuh" +#include "prims/reduce_op.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace cugraph { + +namespace detail { + +int32_t constexpr update_v_frontier_from_outgoing_e_kernel_block_size = 512; + +template +struct transform_reduce_v_frontier_call_e_op_t { + EdgeOp e_op{}; + + __device__ thrust::optional< + std::conditional_t && !std::is_same_v, + thrust::tuple, + std::conditional_t, key_t, payload_t>>> + operator()(key_t key, vertex_t dst, src_value_t sv, dst_value_t dv, e_value_t ev) const + { + auto e_op_result = e_op(key, dst, sv, dv, ev); + if (e_op_result.has_value()) { + auto reduce_by = dst; + if constexpr (std::is_same_v && std::is_same_v) { + return reduce_by; + } else if constexpr (std::is_same_v && !std::is_same_v) { + return thrust::make_tuple(reduce_by, *e_op_result); + } else if constexpr (!std::is_same_v && std::is_same_v) { + return thrust::make_tuple(reduce_by, *e_op_result); + } else { + return thrust::make_tuple(thrust::make_tuple(reduce_by, thrust::get<0>(*e_op_result)), + thrust::get<1>(*e_op_result)); + } + } else { + return thrust::nullopt; + } + } +}; + +template +struct update_keep_flag_t { + using input_key_t = + typename thrust::iterator_traits::value_type; // uint32_t (compressed) or + // key_t (i.e. vertex_t) + + raft::device_span bitmap{}; + raft::device_span keep_flags{}; + key_t v_range_first{}; + InputKeyIterator input_key_first{}; + thrust::optional invalid_input_key{}; + + __device__ void operator()(size_t i) const + { + auto v = *(input_key_first + i); + if (invalid_input_key && (v == *invalid_input_key)) { + return; // just discard + } + input_key_t v_offset{}; + if constexpr ((sizeof(key_t) == 8) && std::is_same_v) { + v_offset = v; + } else { + v_offset = v - v_range_first; + } + cuda::atomic_ref bitmap_word( + bitmap[packed_bool_offset(v_offset)]); + auto old = bitmap_word.fetch_or(packed_bool_mask(v_offset), cuda::std::memory_order_relaxed); + if ((old & packed_bool_mask(v_offset)) == packed_bool_empty_mask()) { + cuda::atomic_ref keep_flag_word( + keep_flags[packed_bool_offset(i)]); + keep_flag_word.fetch_or(packed_bool_mask(i), cuda::std::memory_order_relaxed); + } + } +}; + +template +std::tuple, optional_dataframe_buffer_type_t> +filter_buffer_elements( + raft::handle_t const& handle, + rmm::device_uvector&& + unique_v_buffer, // assumes that buffer elements are locally reduced first and unique + optional_dataframe_buffer_type_t&& payload_buffer, + raft::device_span vertex_partition_range_offsets, // size = major_comm_size + 1 + vertex_t allreduce_count_per_rank, + int subgroup_size) +{ + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_rank = major_comm.get_rank(); + auto const major_comm_size = major_comm.get_size(); + + rmm::device_uvector priorities(allreduce_count_per_rank * major_comm_size, + handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + priorities.begin(), + priorities.end(), + std::numeric_limits::max()); + thrust::for_each( + handle.get_thrust_policy(), + unique_v_buffer.begin(), + unique_v_buffer.end(), + [offsets = vertex_partition_range_offsets, + priorities = raft::device_span(priorities.data(), priorities.size()), + allreduce_count_per_rank, + subgroup_size, + major_comm_rank, + major_comm_size] __device__(auto v) { + auto root = + thrust::distance(offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), v)); + auto v_offset = v - offsets[root]; + if (v_offset < allreduce_count_per_rank) { + priorities[allreduce_count_per_rank * root + v_offset] = + rank_to_priority( + major_comm_rank, root, subgroup_size, major_comm_size, v_offset); + } + }); + device_allreduce(major_comm, + priorities.data(), + priorities.data(), + priorities.size(), + raft::comms::op_t::MIN, + handle.get_stream()); + if constexpr (std::is_same_v) { + unique_v_buffer.resize( + thrust::distance( + unique_v_buffer.begin(), + thrust::remove_if( + handle.get_thrust_policy(), + unique_v_buffer.begin(), + unique_v_buffer.end(), + unique_v_buffer.begin(), + [offsets = vertex_partition_range_offsets, + priorities = raft::device_span(priorities.data(), priorities.size()), + allreduce_count_per_rank, + subgroup_size, + major_comm_rank, + major_comm_size] __device__(auto v) { + auto root = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), v)); + auto v_offset = v - offsets[root]; + if (v_offset < allreduce_count_per_rank) { + auto selected_rank = priority_to_rank( + priorities[allreduce_count_per_rank * root + v_offset], + root, + subgroup_size, + major_comm_size, + v_offset); + return major_comm_rank != selected_rank; + } else { + return false; + } + })), + handle.get_stream()); + } else { + auto kv_pair_first = thrust::make_zip_iterator(unique_v_buffer.begin(), + get_dataframe_buffer_begin(payload_buffer)); + unique_v_buffer.resize( + thrust::distance( + kv_pair_first, + thrust::remove_if( + handle.get_thrust_policy(), + kv_pair_first, + kv_pair_first + unique_v_buffer.size(), + unique_v_buffer.begin(), + [offsets = vertex_partition_range_offsets, + priorities = raft::device_span(priorities.data(), priorities.size()), + allreduce_count_per_rank, + subgroup_size, + major_comm_rank, + major_comm_size] __device__(auto v) { + auto root = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), v)); + auto v_offset = v - offsets[root]; + if (v_offset < allreduce_count_per_rank) { + auto selected_rank = priority_to_rank( + priorities[allreduce_count_per_rank * root + v_offset], + root, + subgroup_size, + major_comm_size, + v_offset); + return major_comm_rank != selected_rank; + } else { + return false; + } + })), + handle.get_stream()); + resize_dataframe_buffer(payload_buffer, unique_v_buffer.size(), handle.get_stream()); + } + + return std::make_tuple(std::move(unique_v_buffer), std::move(payload_buffer)); +} + +template +std::tuple, optional_dataframe_buffer_type_t> +sort_and_reduce_buffer_elements( + raft::handle_t const& handle, + dataframe_buffer_type_t&& key_buffer, + optional_dataframe_buffer_type_t&& payload_buffer, + ReduceOp reduce_op, + std::conditional_t, std::tuple, std::byte /* dummy */> + vertex_range, + std::optional invalid_key /* drop (key, (payload)) pairs with invalid key */) +{ + constexpr bool compressed = + std::is_integral_v && (sizeof(key_t) == 8) && + std::is_same_v; // we currently compress only when key_t is an integral + // type (i.e. vertex_t) + static_assert(compressed || std::is_same_v); + + if constexpr (std::is_integral_v && + (std::is_same_v || + std::is_same_v>)) { // try to use + // bitmap for + // filtering + key_t range_size = std::get<1>(vertex_range) - std::get<0>(vertex_range); + if (static_cast(size_dataframe_buffer(key_buffer)) >= + static_cast(range_size) * + 0.125 /* tuning parameter */) { // use bitmap for filtering + rmm::device_uvector bitmap(packed_bool_size(range_size), handle.get_stream()); + rmm::device_uvector keep_flags(packed_bool_size(size_dataframe_buffer(key_buffer)), + handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), bitmap.begin(), bitmap.end(), packed_bool_empty_mask()); + thrust::fill( + handle.get_thrust_policy(), keep_flags.begin(), keep_flags.end(), packed_bool_empty_mask()); + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(size_dataframe_buffer(key_buffer)), + update_keep_flag_t{ + raft::device_span(bitmap.data(), bitmap.size()), + raft::device_span(keep_flags.data(), keep_flags.size()), + std::get<0>(vertex_range), + get_dataframe_buffer_begin(key_buffer), + to_thrust_optional(invalid_key)}); + auto stencil_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [keep_flags = raft::device_span(keep_flags.data(), + keep_flags.size())] __device__(size_t i) { + return (keep_flags[packed_bool_offset(i)] & packed_bool_mask(i)) != + packed_bool_empty_mask(); + })); + if constexpr (std::is_same_v) { + resize_dataframe_buffer( + key_buffer, + thrust::distance(get_dataframe_buffer_begin(key_buffer), + thrust::remove_if(handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer), + stencil_first, + is_not_equal_t{true})), + handle.get_stream()); + shrink_to_fit_dataframe_buffer(key_buffer, handle.get_stream()); + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer)); + } else { + static_assert(std::is_same_v>); + auto pair_first = thrust::make_zip_iterator(get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_begin(payload_buffer)); + resize_dataframe_buffer( + key_buffer, + thrust::distance(pair_first, + thrust::remove_if(handle.get_thrust_policy(), + pair_first, + pair_first + size_dataframe_buffer(key_buffer), + stencil_first, + is_not_equal_t{true})), + handle.get_stream()); + resize_dataframe_buffer( + payload_buffer, size_dataframe_buffer(key_buffer), handle.get_stream()); + shrink_to_fit_dataframe_buffer(key_buffer, handle.get_stream()); + shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); + thrust::sort_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer), + get_dataframe_buffer_begin(payload_buffer)); + } + + if constexpr (compressed) { + rmm::device_uvector output_key_buffer(key_buffer.size(), handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + key_buffer.begin(), + key_buffer.end(), + output_key_buffer.begin(), + cuda::proclaim_return_type( + [v_first = std::get<0>(vertex_range)] __device__(uint32_t v_offset) { + return static_cast(v_first + v_offset); + })); + return std::make_tuple(std::move(output_key_buffer), std::move(payload_buffer)); + } else { + return std::make_tuple(std::move(key_buffer), std::move(payload_buffer)); + } + } + } + + if constexpr (std::is_same_v) { + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer)); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer), + get_optional_dataframe_buffer_begin(payload_buffer)); + } + + auto output_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); + if constexpr (std::is_same_v) { + if constexpr (compressed) { + resize_dataframe_buffer( + output_key_buffer, size_dataframe_buffer(key_buffer), handle.get_stream()); + auto input_key_first = thrust::make_transform_iterator( + get_dataframe_buffer_begin(key_buffer), + cuda::proclaim_return_type( + [v_first = std::get<0>(vertex_range)] __device__(auto v_offset) { + return static_cast(v_first + v_offset); + })); + resize_dataframe_buffer( + output_key_buffer, + thrust::distance( + get_dataframe_buffer_begin(output_key_buffer), + thrust::copy_if(handle.get_thrust_policy(), + input_key_first, + input_key_first + size_dataframe_buffer(key_buffer), + thrust::make_counting_iterator(size_t{0}), + get_dataframe_buffer_begin(output_key_buffer), + cuda::proclaim_return_type( + [key_first = get_dataframe_buffer_begin(key_buffer), + invalid_key = to_thrust_optional(invalid_key)] __device__(size_t i) { + auto key = *(key_first + i); + if (invalid_key && (key == *invalid_key)) { + return false; + } else if ((i != 0) && (key == *(key_first + (i - 1)))) { + return false; + } else { + return true; + } + }))), + handle.get_stream()); + } else { + resize_dataframe_buffer( + key_buffer, + thrust::distance( + get_dataframe_buffer_begin(key_buffer), + thrust::remove_if(handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer), + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [key_first = get_dataframe_buffer_begin(key_buffer), + invalid_key = to_thrust_optional(invalid_key)] __device__(size_t i) { + auto key = *(key_first + i); + if (invalid_key && (key == *invalid_key)) { + return true; + } else if ((i != 0) && (key == *(key_first + (i - 1)))) { + return true; + } else { + return false; + } + }))), + handle.get_stream()); + output_key_buffer = std::move(key_buffer); + } + shrink_to_fit_dataframe_buffer(output_key_buffer, handle.get_stream()); + } else if constexpr (std::is_same_v>) { + if constexpr (compressed) { + resize_dataframe_buffer( + output_key_buffer, size_dataframe_buffer(key_buffer), handle.get_stream()); + auto input_key_first = thrust::make_transform_iterator( + get_dataframe_buffer_begin(key_buffer), + cuda::proclaim_return_type( + [v_first = std::get<0>(vertex_range)] __device__(auto v_offset) { + return static_cast(v_first + v_offset); + })); + auto tmp_payload_buffer = allocate_dataframe_buffer( + size_dataframe_buffer(payload_buffer), handle.get_stream()); + auto input_pair_first = + thrust::make_zip_iterator(input_key_first, get_dataframe_buffer_begin(payload_buffer)); + auto output_pair_first = + thrust::make_zip_iterator(get_dataframe_buffer_begin(output_key_buffer), + get_dataframe_buffer_begin(tmp_payload_buffer)); + resize_dataframe_buffer( + output_key_buffer, + thrust::distance( + output_pair_first, + thrust::copy_if(handle.get_thrust_policy(), + input_pair_first, + input_pair_first + size_dataframe_buffer(key_buffer), + thrust::make_counting_iterator(size_t{0}), + output_pair_first, + cuda::proclaim_return_type( + [key_first = get_dataframe_buffer_begin(key_buffer), + invalid_key = to_thrust_optional(invalid_key)] __device__(size_t i) { + auto key = *(key_first + i); + if (invalid_key && (key == *invalid_key)) { + return false; + } else if ((i != 0) && (key == *(key_first + (i - 1)))) { + return false; + } else { + return true; + } + }))), + handle.get_stream()); + resize_dataframe_buffer( + tmp_payload_buffer, size_dataframe_buffer(output_key_buffer), handle.get_stream()); + payload_buffer = std::move(tmp_payload_buffer); + } else { + auto pair_first = thrust::make_zip_iterator(get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_begin(payload_buffer)); + resize_dataframe_buffer( + key_buffer, + thrust::distance( + pair_first, + thrust::remove_if(handle.get_thrust_policy(), + pair_first, + pair_first + size_dataframe_buffer(key_buffer), + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [key_first = get_dataframe_buffer_begin(key_buffer), + invalid_key = to_thrust_optional(invalid_key)] __device__(size_t i) { + auto key = *(key_first + i); + if (invalid_key && (key == *invalid_key)) { + return true; + } else if ((i != 0) && (key == *(key_first + (i - 1)))) { + return true; + } else { + return false; + } + }))), + handle.get_stream()); + resize_dataframe_buffer( + payload_buffer, size_dataframe_buffer(key_buffer), handle.get_stream()); + output_key_buffer = std::move(key_buffer); + } + shrink_to_fit_dataframe_buffer(output_key_buffer, handle.get_stream()); + shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); + } else { + if (invalid_key) { + auto pair_first = thrust::make_zip_iterator(get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_begin(payload_buffer)); + resize_dataframe_buffer( + key_buffer, + thrust::distance(pair_first, + thrust::remove_if(handle.get_thrust_policy(), + pair_first, + pair_first + size_dataframe_buffer(key_buffer), + cuda::proclaim_return_type( + [invalid_key = *invalid_key] __device__(auto kv) { + auto key = thrust::get<0>(kv); + return key == invalid_key; + }))), + handle.get_stream()); + resize_dataframe_buffer( + payload_buffer, size_dataframe_buffer(key_buffer), handle.get_stream()); + } + auto num_uniques = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(size_dataframe_buffer(key_buffer)), + is_first_in_run_t{ + get_dataframe_buffer_begin(key_buffer)}); + + auto new_key_buffer = allocate_dataframe_buffer(num_uniques, handle.get_stream()); + auto new_payload_buffer = + allocate_dataframe_buffer(num_uniques, handle.get_stream()); + + if constexpr (compressed) { + auto input_key_first = thrust::make_transform_iterator( + get_dataframe_buffer_begin(key_buffer), + cuda::proclaim_return_type( + [v_first = std::get<0>(vertex_range)] __device__(auto v_offset) { + return static_cast(v_first + v_offset); + })); + thrust::reduce_by_key(handle.get_thrust_policy(), + input_key_first, + input_key_first + size_dataframe_buffer(key_buffer), + get_optional_dataframe_buffer_begin(payload_buffer), + get_dataframe_buffer_begin(new_key_buffer), + get_dataframe_buffer_begin(new_payload_buffer), + thrust::equal_to(), + reduce_op); + } else { + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer), + get_optional_dataframe_buffer_begin(payload_buffer), + get_dataframe_buffer_begin(new_key_buffer), + get_dataframe_buffer_begin(new_payload_buffer), + thrust::equal_to(), + reduce_op); + } + + output_key_buffer = std::move(new_key_buffer); + payload_buffer = std::move(new_payload_buffer); + } + + return std::make_tuple(std::move(output_key_buffer), std::move(payload_buffer)); +} + +template +std::conditional_t< + !std::is_same_v, + std::tuple( + 0, rmm::cuda_stream_view{})), + decltype(detail::allocate_optional_dataframe_buffer( + 0, rmm::cuda_stream_view{}))>, + decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))> +transform_reduce_v_frontier_outgoing_e_by_dst(raft::handle_t const& handle, + GraphViewType const& graph_view, + KeyBucketType const& frontier, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + ReduceOp reduce_op, + bool do_expensive_check = false) +{ + static_assert(!GraphViewType::is_storage_transposed, + "GraphViewType should support the push model."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = typename KeyBucketType::key_type; + using payload_t = typename ReduceOp::value_type; + + if (do_expensive_check) { + // currently, nothing to do + } + + // 1. fill the buffer + + detail::transform_reduce_v_frontier_call_e_op_t + e_op_wrapper{e_op}; + + auto [key_buffer, payload_buffer] = + detail::extract_transform_v_frontier_e(handle, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op_wrapper, + do_expensive_check); + // 2. reduce the buffer + + std::vector vertex_partition_range_offsets{}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_rank = major_comm.get_rank(); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + vertex_partition_range_offsets = std::vector(major_comm_size + 1); + for (int i = 0; i < major_comm_size; ++i) { + auto vertex_partition_id = + detail::compute_local_edge_partition_minor_range_vertex_partition_id_t{ + major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); + vertex_partition_range_offsets[i] = + graph_view.vertex_partition_range_first(vertex_partition_id); + } + vertex_partition_range_offsets.back() = graph_view.local_edge_partition_dst_range_last(); + } else { + vertex_partition_range_offsets = + std::vector{graph_view.local_edge_partition_dst_range_first(), + graph_view.local_edge_partition_dst_range_last()}; + } + std::conditional_t, std::tuple, std::byte /* dummy */> + vertex_range{}; + if constexpr (std::is_integral_v) { + vertex_range = std::make_tuple(vertex_partition_range_offsets.front(), + vertex_partition_range_offsets.back()); + } + std::tie(key_buffer, payload_buffer) = + detail::sort_and_reduce_buffer_elements( + handle, + std::move(key_buffer), + std::move(payload_buffer), + reduce_op, + vertex_range, + std::nullopt); + if constexpr (GraphViewType::is_multi_gpu) { + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + if (major_comm_size > 1) { + size_t local_key_buffer_size = size_dataframe_buffer(key_buffer); + auto avg_key_buffer_size = + host_scalar_allreduce( + major_comm, local_key_buffer_size, raft::comms::op_t::SUM, handle.get_stream()) / + major_comm_size; + + rmm::device_uvector d_vertex_partition_range_offsets( + vertex_partition_range_offsets.size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_offsets.data(), + vertex_partition_range_offsets.data(), + vertex_partition_range_offsets.size(), + handle.get_stream()); + + constexpr bool try_compression = (sizeof(vertex_t) == 8) && std::is_same_v; + std::conditional_t + max_vertex_partition_size{}; + if constexpr (try_compression) { + for (int i = 0; i < major_comm_size; ++i) { + max_vertex_partition_size = + std::max(vertex_partition_range_offsets[i + 1] - vertex_partition_range_offsets[i], + max_vertex_partition_size); + } + } + + if constexpr (std::is_same_v && + std::is_same_v>) { + vertex_t min_vertex_partition_size = std::numeric_limits::max(); + for (int i = 0; i < major_comm_size; ++i) { + min_vertex_partition_size = + std::min(vertex_partition_range_offsets[i + 1] - vertex_partition_range_offsets[i], + min_vertex_partition_size); + } + + auto segment_offsets = graph_view.local_vertex_partition_segment_offsets(); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + if (segment_offsets && + (static_cast(avg_key_buffer_size) > + static_cast(graph_view.number_of_vertices() / comm_size) * + double{0.2})) { // duplicates expected for high in-degree vertices (and we assume + // correlation between in-degrees & out-degrees) // FIXME: we need + // a better criterion + size_t key_size{0}; + size_t payload_size{0}; + if constexpr (try_compression) { + if (max_vertex_partition_size <= std::numeric_limits::max()) { + key_size = sizeof(uint32_t); + } else { + key_size = sizeof(key_t); + } + } else { + if constexpr (std::is_arithmetic_v) { + key_size = sizeof(key_t); + } else { + key_size = sum_thrust_tuple_element_sizes(); + } + } + if constexpr (!std::is_same_v) { + if constexpr (std::is_arithmetic_v) { + payload_size = sizeof(payload_t); + } else { + payload_size = sum_thrust_tuple_element_sizes(); + } + } + + int subgroup_size{}; + int num_gpus_per_node{}; + RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus_per_node)); + if (comm_size <= num_gpus_per_node) { + subgroup_size = major_comm_size; + } else { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + subgroup_size = partition_manager::map_major_comm_to_gpu_row_comm + ? std::min(major_comm_size, num_gpus_per_node) + : std::max(num_gpus_per_node / minor_comm_size, int{1}); + } + + auto p2p_size_per_rank = avg_key_buffer_size * (key_size + payload_size); + auto p2p_size_per_node = p2p_size_per_rank * std::min(num_gpus_per_node, comm_size); + auto allreduce_size_per_node = p2p_size_per_node / 16 /* tuning parameter */; + auto allreduce_size_per_rank = + allreduce_size_per_node / (major_comm_size * (num_gpus_per_node / subgroup_size)); + + if (major_comm_size <= std::numeric_limits::max()) { // priority = uint8_t + std::tie(key_buffer, payload_buffer) = + filter_buffer_elements( + handle, + std::move(key_buffer), + std::move(payload_buffer), + raft::device_span(d_vertex_partition_range_offsets.data(), + d_vertex_partition_range_offsets.size()), + std::min(static_cast(allreduce_size_per_rank / sizeof(uint8_t)), + min_vertex_partition_size), + subgroup_size); + } else { // priority = uint32_t + std::tie(key_buffer, payload_buffer) = + filter_buffer_elements( + handle, + std::move(key_buffer), + std::move(payload_buffer), + raft::device_span(d_vertex_partition_range_offsets.data(), + d_vertex_partition_range_offsets.size()), + std::min(static_cast(allreduce_size_per_rank / sizeof(uint32_t)), + min_vertex_partition_size), + subgroup_size); + } + } + } + + rmm::device_uvector d_tx_buffer_last_boundaries(major_comm_size, handle.get_stream()); + auto key_v_first = + thrust_tuple_get_or_identity( + get_dataframe_buffer_begin(key_buffer)); + thrust::lower_bound(handle.get_thrust_policy(), + key_v_first, + key_v_first + size_dataframe_buffer(key_buffer), + d_vertex_partition_range_offsets.begin() + 1, + d_vertex_partition_range_offsets.end(), + d_tx_buffer_last_boundaries.begin()); + std::conditional_t>, + std::byte /* dummy */> + compressed_v_buffer{}; + if constexpr (try_compression) { + if (max_vertex_partition_size <= std::numeric_limits::max()) { + compressed_v_buffer = + rmm::device_uvector(size_dataframe_buffer(key_buffer), handle.get_stream()); + thrust::transform( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_end(key_buffer), + (*compressed_v_buffer).begin(), + cuda::proclaim_return_type( + [firsts = raft::device_span(d_vertex_partition_range_offsets.data(), + static_cast(major_comm_size)), + lasts = raft::device_span( + d_vertex_partition_range_offsets.data() + 1, + static_cast(major_comm_size))] __device__(auto v) { + auto major_comm_rank = thrust::distance( + lasts.begin(), thrust::upper_bound(thrust::seq, lasts.begin(), lasts.end(), v)); + return static_cast(v - firsts[major_comm_rank]); + })); + resize_dataframe_buffer(key_buffer, 0, handle.get_stream()); + shrink_to_fit_dataframe_buffer(key_buffer, handle.get_stream()); + } + } + std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); + raft::update_host(h_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.size(), + handle.get_stream()); + handle.sync_stream(); + std::vector tx_counts(h_tx_buffer_last_boundaries.size()); + std::adjacent_difference( + h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); + + size_t min_element_size{cache_line_size}; + if constexpr (std::is_same_v) { + if constexpr (try_compression) { + if (compressed_v_buffer) { + min_element_size = std::min(sizeof(uint32_t), min_element_size); + } else { + min_element_size = std::min(sizeof(key_t), min_element_size); + } + } else { + min_element_size = std::min(sizeof(key_t), min_element_size); + } + } else { + static_assert(is_thrust_tuple_of_arithmetic::value); + min_element_size = + std::min(cugraph::min_thrust_tuple_element_sizes(), min_element_size); + } + if constexpr (!std::is_same_v) { + if constexpr (std::is_arithmetic_v) { + min_element_size = std::min(sizeof(payload_t), min_element_size); + } else { + static_assert(is_thrust_tuple_of_arithmetic::value); + min_element_size = + std::min(cugraph::min_thrust_tuple_element_sizes(), min_element_size); + } + } + assert((cache_line_size % min_element_size) == 0); + auto alignment = cache_line_size / min_element_size; + std::optional, key_t>> + invalid_key{std::nullopt}; + + if (avg_key_buffer_size >= alignment * size_t{128} /* 128 tuning parameter */) { + if constexpr (std::is_same_v) { + if constexpr (try_compression) { + if (compressed_v_buffer) { + invalid_key = std::numeric_limits::max(); + } else { + invalid_key = invalid_vertex_id_v; + } + } else { + invalid_key = invalid_vertex_id_v; + } + } else { + invalid_key = key_t{}; + thrust::get<0>(*invalid_key) = invalid_vertex_id_v; + } + + if constexpr (try_compression) { + if (compressed_v_buffer) { + auto rx_compressed_v_buffer = + allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_compressed_v_buffer, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = shuffle_values(major_comm, + get_dataframe_buffer_begin(*compressed_v_buffer), + tx_counts, + alignment, + std::make_optional(std::get<1>(*invalid_key)), + handle.get_stream()); + compressed_v_buffer = std::move(rx_compressed_v_buffer); + } else { + auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_key_buffer, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = shuffle_values(major_comm, + get_dataframe_buffer_begin(key_buffer), + tx_counts, + alignment, + std::make_optional(std::get<0>(*invalid_key)), + handle.get_stream()); + key_buffer = std::move(rx_key_buffer); + } + } else { + auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_key_buffer, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = shuffle_values(major_comm, + get_dataframe_buffer_begin(key_buffer), + tx_counts, + alignment, + invalid_key, + handle.get_stream()); + key_buffer = std::move(rx_key_buffer); + } + if constexpr (!std::is_same_v) { + auto rx_payload_buffer = + allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_payload_buffer, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = shuffle_values(major_comm, + get_dataframe_buffer_begin(payload_buffer), + tx_counts, + alignment, + std::nullopt, + handle.get_stream()); + payload_buffer = std::move(rx_payload_buffer); + } + } else { + if constexpr (try_compression) { + if (compressed_v_buffer) { + auto rx_compressed_v_buffer = + allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_compressed_v_buffer, std::ignore) = + shuffle_values(major_comm, + get_dataframe_buffer_begin(*compressed_v_buffer), + tx_counts, + handle.get_stream()); + compressed_v_buffer = std::move(rx_compressed_v_buffer); + } else { + auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_key_buffer, std::ignore) = shuffle_values( + major_comm, get_dataframe_buffer_begin(key_buffer), tx_counts, handle.get_stream()); + key_buffer = std::move(rx_key_buffer); + } + } else { + auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_key_buffer, std::ignore) = shuffle_values( + major_comm, get_dataframe_buffer_begin(key_buffer), tx_counts, handle.get_stream()); + key_buffer = std::move(rx_key_buffer); + } + + if constexpr (!std::is_same_v) { + auto rx_payload_buffer = + allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_payload_buffer, std::ignore) = shuffle_values( + major_comm, get_dataframe_buffer_begin(payload_buffer), tx_counts, handle.get_stream()); + payload_buffer = std::move(rx_payload_buffer); + } + } + + if constexpr (std::is_integral_v) { + vertex_range = std::make_tuple(graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last()); + } + if constexpr (try_compression) { + if (compressed_v_buffer) { + std::tie(key_buffer, payload_buffer) = + detail::sort_and_reduce_buffer_elements( + handle, + std::move(*compressed_v_buffer), + std::move(payload_buffer), + reduce_op, + vertex_range, + invalid_key ? std::make_optional(std::get<1>(*invalid_key)) : std::nullopt); + } else { + std::tie(key_buffer, payload_buffer) = + detail::sort_and_reduce_buffer_elements( + handle, + std::move(key_buffer), + std::move(payload_buffer), + reduce_op, + vertex_range, + invalid_key ? std::make_optional(std::get<0>(*invalid_key)) : std::nullopt); + } + } else { + std::tie(key_buffer, payload_buffer) = + detail::sort_and_reduce_buffer_elements( + handle, + std::move(key_buffer), + std::move(payload_buffer), + reduce_op, + vertex_range, + invalid_key); + } + } + } + + if constexpr (!std::is_same_v) { + return std::make_tuple(std::move(key_buffer), std::move(payload_buffer)); + } else { + return std::move(key_buffer); + } +} + +} // namespace detail + +template +size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, + GraphViewType const& graph_view, + KeyBucketType const& frontier) +{ + static_assert(!GraphViewType::is_storage_transposed, + "GraphViewType should support the push model."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = typename KeyBucketType::key_type; + + size_t ret{0}; + + auto local_frontier_vertex_first = + thrust_tuple_get_or_identity(frontier.begin()); + + std::vector local_frontier_sizes{}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + local_frontier_sizes = host_scalar_allgather(minor_comm, frontier.size(), handle.get_stream()); + } else { + local_frontier_sizes = std::vector{static_cast(frontier.size())}; + } + + auto edge_mask_view = graph_view.edge_mask_view(); + + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; + + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + + rmm::device_uvector edge_partition_frontier_vertices(local_frontier_sizes[i], + handle.get_stream()); + device_bcast(minor_comm, + local_frontier_vertex_first, + edge_partition_frontier_vertices.data(), + local_frontier_sizes[i], + static_cast(i), + handle.get_stream()); + + if (edge_partition_e_mask) { + ret += + edge_partition.compute_number_of_edges_with_mask((*edge_partition_e_mask).value_first(), + edge_partition_frontier_vertices.begin(), + edge_partition_frontier_vertices.end(), + handle.get_stream()); + } else { + ret += edge_partition.compute_number_of_edges(edge_partition_frontier_vertices.begin(), + edge_partition_frontier_vertices.end(), + handle.get_stream()); + } + } else { + assert(i == 0); + if (edge_partition_e_mask) { + ret += edge_partition.compute_number_of_edges_with_mask( + (*edge_partition_e_mask).value_first(), + local_frontier_vertex_first, + local_frontier_vertex_first + frontier.size(), + handle.get_stream()); + } else { + ret += edge_partition.compute_number_of_edges(local_frontier_vertex_first, + local_frontier_vertex_first + frontier.size(), + handle.get_stream()); + } + } + } + + return ret; +} + +/** + * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor + * outputs by (tagged-)destination ID. + * + * Edge functor outputs are thrust::optional objects and invalid if thrust::nullopt. Vertices are + * assumed to be tagged if KeyBucketType::key_type is a tuple of a vertex type and a tag + * type (KeyBucketType::key_type is identical to a vertex type otherwise). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam KeyBucketType Type of the vertex frontier bucket class which abstracts the + * current (tagged-)vertex frontier. + * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. + * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. + * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. + * @tparam EdgeOp Type of the quinary edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param frontier KeyBucketType class object for the current vertex frontier. + * @param edge_src_value_input Wrapper used to access source input property values (for the edge + * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() + * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() + * (if @p e_op does not access source property values). Use update_edge_src_property to fill the + * wrapper. + * @param edge_dst_value_input Wrapper used to access destination input property values (for the + * edge destinations assigned to this process in multi-GPU). Use either + * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or + * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property + * values). Use update_edge_dst_property to fill the wrapper. + * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned + * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to + * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not + * access edge property values). + * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for + * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be + * discarded); 2) dummy (but valid) thrust::optional object (e.g. + * thrust::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is + * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be + * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void); + * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type + * is not void). + * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. + * There are pre-defined reduction operators in prims/reduce_op.cuh. It is + * recommended to use the pre-defined reduction operators whenever possible as the current (and + * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has + * known member variables) to take a more optimized code path. See the documentation in the + * reduce_op.cuh file for instructions on writing custom reduction operators. + * @return Tuple of key values and payload values (if ReduceOp::value_type is not void) or just key + * values (if ReduceOp::value_type is void). Keys in the return values are sorted in ascending order + * using a vertex ID as the primary key and a tag (if relevant) as the secondary key. + */ +template +std::conditional_t< + !std::is_same_v, + std::tuple( + 0, rmm::cuda_stream_view{})), + decltype(detail::allocate_optional_dataframe_buffer( + 0, rmm::cuda_stream_view{}))>, + decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))> +transform_reduce_v_frontier_outgoing_e_by_dst(raft::handle_t const& handle, + GraphViewType const& graph_view, + KeyBucketType const& frontier, + EdgeSrcValueInputWrapper edge_src_value_input, + EdgeDstValueInputWrapper edge_dst_value_input, + EdgeValueInputWrapper edge_value_input, + EdgeOp e_op, + ReduceOp reduce_op, + bool do_expensive_check = false) +{ + return detail::transform_reduce_v_frontier_outgoing_e_by_dst(handle, + graph_view, + frontier, + edge_src_value_input, + edge_dst_value_input, + edge_value_input, + e_op, + reduce_op, + do_expensive_check); +} + +} // namespace cugraph diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh deleted file mode 100644 index e58ab08fa97..00000000000 --- a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh +++ /dev/null @@ -1,585 +0,0 @@ -/* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "detail/graph_partition_utils.cuh" -#include "prims/detail/extract_transform_v_frontier_e.cuh" -#include "prims/property_op_utils.cuh" -#include "prims/reduce_op.cuh" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace cugraph { - -namespace detail { - -int32_t constexpr update_v_frontier_from_outgoing_e_kernel_block_size = 512; - -template -struct transform_reduce_v_frontier_call_e_op_t { - EdgeOp e_op{}; - - __device__ thrust::optional< - std::conditional_t && !std::is_same_v, - thrust::tuple, - std::conditional_t, key_t, payload_t>>> - operator()(key_t key, vertex_t dst, src_value_t sv, dst_value_t dv, e_value_t ev) const - { - auto e_op_result = e_op(key, dst, sv, dv, ev); - if (e_op_result.has_value()) { - auto reduce_by = reduce_by_src ? thrust_tuple_get_or_identity(key) : dst; - if constexpr (std::is_same_v && std::is_same_v) { - return reduce_by; - } else if constexpr (std::is_same_v && !std::is_same_v) { - return thrust::make_tuple(reduce_by, *e_op_result); - } else if constexpr (!std::is_same_v && std::is_same_v) { - return thrust::make_tuple(reduce_by, *e_op_result); - } else { - return thrust::make_tuple(thrust::make_tuple(reduce_by, thrust::get<0>(*e_op_result)), - thrust::get<1>(*e_op_result)); - } - } else { - return thrust::nullopt; - } - } -}; - -template -auto sort_and_reduce_buffer_elements( - raft::handle_t const& handle, - decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& key_buffer, - decltype(allocate_optional_dataframe_buffer(0, - rmm::cuda_stream_view{}))&& payload_buffer, - ReduceOp reduce_op) -{ - if constexpr (std::is_same_v) { - thrust::sort(handle.get_thrust_policy(), - get_dataframe_buffer_begin(key_buffer), - get_dataframe_buffer_end(key_buffer)); - } else { - thrust::sort_by_key(handle.get_thrust_policy(), - get_dataframe_buffer_begin(key_buffer), - get_dataframe_buffer_end(key_buffer), - get_optional_dataframe_buffer_begin(payload_buffer)); - } - - if constexpr (std::is_same_v) { - auto it = thrust::unique(handle.get_thrust_policy(), - get_dataframe_buffer_begin(key_buffer), - get_dataframe_buffer_end(key_buffer)); - resize_dataframe_buffer( - key_buffer, - static_cast(thrust::distance(get_dataframe_buffer_begin(key_buffer), it)), - handle.get_stream()); - shrink_to_fit_dataframe_buffer(key_buffer, handle.get_stream()); - } else if constexpr (std::is_same_v>) { - auto it = thrust::unique_by_key(handle.get_thrust_policy(), - get_dataframe_buffer_begin(key_buffer), - get_dataframe_buffer_end(key_buffer), - get_optional_dataframe_buffer_begin(payload_buffer)); - resize_dataframe_buffer(key_buffer, - static_cast(thrust::distance( - get_dataframe_buffer_begin(key_buffer), thrust::get<0>(it))), - handle.get_stream()); - resize_dataframe_buffer(payload_buffer, size_dataframe_buffer(key_buffer), handle.get_stream()); - shrink_to_fit_dataframe_buffer(key_buffer, handle.get_stream()); - shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); - } else { - auto num_uniques = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(size_dataframe_buffer(key_buffer)), - is_first_in_run_t{ - get_dataframe_buffer_begin(key_buffer)}); - - auto new_key_buffer = allocate_dataframe_buffer(num_uniques, handle.get_stream()); - auto new_payload_buffer = - allocate_dataframe_buffer(num_uniques, handle.get_stream()); - - thrust::reduce_by_key(handle.get_thrust_policy(), - get_dataframe_buffer_begin(key_buffer), - get_dataframe_buffer_end(key_buffer), - get_optional_dataframe_buffer_begin(payload_buffer), - get_dataframe_buffer_begin(new_key_buffer), - get_dataframe_buffer_begin(new_payload_buffer), - thrust::equal_to(), - reduce_op); - - key_buffer = std::move(new_key_buffer); - payload_buffer = std::move(new_payload_buffer); - } - - return std::make_tuple(std::move(key_buffer), std::move(payload_buffer)); -} - -template -std::conditional_t< - !std::is_same_v, - std::tuple( - 0, rmm::cuda_stream_view{})), - decltype(detail::allocate_optional_dataframe_buffer( - 0, rmm::cuda_stream_view{}))>, - decltype(allocate_dataframe_buffer( - 0, rmm::cuda_stream_view{}))> -transform_reduce_v_frontier_outgoing_e_by_src_dst(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier, - EdgeSrcValueInputWrapper edge_src_value_input, - EdgeDstValueInputWrapper edge_dst_value_input, - EdgeValueInputWrapper edge_value_input, - EdgeOp e_op, - ReduceOp reduce_op, - bool do_expensive_check = false) -{ - static_assert(!GraphViewType::is_storage_transposed, - "GraphViewType should support the push model."); - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using key_t = typename VertexFrontierBucketType::key_type; - using payload_t = typename ReduceOp::value_type; - - if (do_expensive_check) { - // currently, nothing to do - } - - // 1. fill the buffer - - detail::transform_reduce_v_frontier_call_e_op_t - e_op_wrapper{e_op}; - - bool constexpr max_one_e_per_frontier_key = - reduce_by_src && std::is_same_v>; - auto [key_buffer, payload_buffer] = - detail::extract_transform_v_frontier_e( - handle, - graph_view, - frontier, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op_wrapper, - do_expensive_check); - - // 2. reduce the buffer - - std::tie(key_buffer, payload_buffer) = - detail::sort_and_reduce_buffer_elements( - handle, std::move(key_buffer), std::move(payload_buffer), reduce_op); - if constexpr (GraphViewType::is_multi_gpu) { - // FIXME: this step is unnecessary if major_comm_size== 1 - auto& comm = handle.get_comms(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_rank = major_comm.get_rank(); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_rank = minor_comm.get_rank(); - auto const minor_comm_size = minor_comm.get_size(); - - std::vector h_vertex_lasts(reduce_by_src ? minor_comm_size : major_comm_size); - for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { - auto vertex_partition_id = - reduce_by_src - ? detail::compute_local_edge_partition_major_range_vertex_partition_id_t{major_comm_size, - minor_comm_size, - major_comm_rank, - minor_comm_rank}( - i) - : detail::compute_local_edge_partition_minor_range_vertex_partition_id_t{ - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - h_vertex_lasts[i] = graph_view.vertex_partition_range_last(vertex_partition_id); - } - - rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); - raft::update_device( - d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); - rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), - handle.get_stream()); - auto reduce_by_first = - thrust_tuple_get_or_identity( - get_dataframe_buffer_begin(key_buffer)); - thrust::lower_bound(handle.get_thrust_policy(), - reduce_by_first, - reduce_by_first + size_dataframe_buffer(key_buffer), - d_vertex_lasts.begin(), - d_vertex_lasts.end(), - d_tx_buffer_last_boundaries.begin()); - std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); - raft::update_host(h_tx_buffer_last_boundaries.data(), - d_tx_buffer_last_boundaries.data(), - d_tx_buffer_last_boundaries.size(), - handle.get_stream()); - handle.sync_stream(); - std::vector tx_counts(h_tx_buffer_last_boundaries.size()); - std::adjacent_difference( - h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); - - auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(rx_key_buffer, std::ignore) = shuffle_values(reduce_by_src ? minor_comm : major_comm, - get_dataframe_buffer_begin(key_buffer), - tx_counts, - handle.get_stream()); - key_buffer = std::move(rx_key_buffer); - - if constexpr (!std::is_same_v) { - auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(rx_payload_buffer, std::ignore) = - shuffle_values(reduce_by_src ? minor_comm : major_comm, - get_dataframe_buffer_begin(payload_buffer), - tx_counts, - handle.get_stream()); - payload_buffer = std::move(rx_payload_buffer); - } - - std::tie(key_buffer, payload_buffer) = - detail::sort_and_reduce_buffer_elements( - handle, std::move(key_buffer), std::move(payload_buffer), reduce_op); - } - - if constexpr (!std::is_same_v) { - return std::make_tuple(std::move(key_buffer), std::move(payload_buffer)); - } else { - return std::move(key_buffer); - } -} - -} // namespace detail - -template -size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier) -{ - static_assert(!GraphViewType::is_storage_transposed, - "GraphViewType should support the push model."); - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using key_t = typename VertexFrontierBucketType::key_type; - - size_t ret{0}; - - auto local_frontier_vertex_first = - thrust_tuple_get_or_identity(frontier.begin()); - - std::vector local_frontier_sizes{}; - if constexpr (GraphViewType::is_multi_gpu) { - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - local_frontier_sizes = host_scalar_allgather(minor_comm, frontier.size(), handle.get_stream()); - } else { - local_frontier_sizes = std::vector{static_cast(frontier.size())}; - } - - auto edge_mask_view = graph_view.edge_mask_view(); - - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - auto edge_partition_e_mask = - edge_mask_view - ? thrust::make_optional< - detail::edge_partition_edge_property_device_view_t>( - *edge_mask_view, i) - : thrust::nullopt; - - if constexpr (GraphViewType::is_multi_gpu) { - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_rank = minor_comm.get_rank(); - - rmm::device_uvector edge_partition_frontier_vertices(local_frontier_sizes[i], - handle.get_stream()); - device_bcast(minor_comm, - local_frontier_vertex_first, - edge_partition_frontier_vertices.data(), - local_frontier_sizes[i], - static_cast(i), - handle.get_stream()); - - if (edge_partition_e_mask) { - ret += - edge_partition.compute_number_of_edges_with_mask((*edge_partition_e_mask).value_first(), - edge_partition_frontier_vertices.begin(), - edge_partition_frontier_vertices.end(), - handle.get_stream()); - } else { - ret += edge_partition.compute_number_of_edges(edge_partition_frontier_vertices.begin(), - edge_partition_frontier_vertices.end(), - handle.get_stream()); - } - } else { - assert(i == 0); - if (edge_partition_e_mask) { - ret += edge_partition.compute_number_of_edges_with_mask( - (*edge_partition_e_mask).value_first(), - local_frontier_vertex_first, - local_frontier_vertex_first + frontier.size(), - handle.get_stream()); - } else { - ret += edge_partition.compute_number_of_edges(local_frontier_vertex_first, - local_frontier_vertex_first + frontier.size(), - handle.get_stream()); - } - } - } - - return ret; -} - -/** - * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor - * outputs by (tagged-)source ID. - * - * Edge functor outputs are thrust::optional objects and invalid if thrust::nullopt. Vertices are - * assumed to be tagged if VertexFrontierBucketType::key_type is a tuple of a vertex type and a tag - * type (VertexFrontierBucketType::key_type is identical to a vertex type otherwise). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexFrontierBucketType Type of the vertex frontier bucket class which abstracts the - * current (tagged-)vertex frontier. - * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. - * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. - * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. - * @tparam EdgeOp Type of the quinary edge operator. - * @tparam ReduceOp Type of the binary reduction operator. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param frontier VertexFrontierBucketType class object for the current vertex frontier. - * @param edge_src_value_input Wrapper used to access source input property values (for the edge - * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() - * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() - * (if @p e_op does not access source property values). Use update_edge_src_property to fill the - * wrapper. - * @param edge_dst_value_input Wrapper used to access destination input property values (for the - * edge destinations assigned to this process in multi-GPU). Use either - * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or - * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property - * values). Use update_edge_dst_property to fill the wrapper. - * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned - * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to - * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not - * access edge property values). - * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for - * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be - * discarded); 2) dummy (but valid) thrust::optional object (e.g. - * thrust::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is - * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be - * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void); - * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type - * is not void). - * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. - * There are pre-defined reduction operators in prims/reduce_op.cuh. It is - * recommended to use the pre-defined reduction operators whenever possible as the current (and - * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has - * known member variables) to take a more optimized code path. See the documentation in the - * reduce_op.cuh file for instructions on writing custom reduction operators. - * @return Tuple of key values and payload values (if ReduceOp::value_type is not void) or just key - * values (if ReduceOp::value_type is void). Keys in the return values are sorted in ascending order - * using a vertex ID as the primary key and a tag (if relevant) as the secondary key. - */ -template -std::conditional_t< - !std::is_same_v, - std::tuple( - 0, rmm::cuda_stream_view{})), - decltype(detail::allocate_optional_dataframe_buffer( - 0, rmm::cuda_stream_view{}))>, - decltype(allocate_dataframe_buffer( - 0, rmm::cuda_stream_view{}))> -transform_reduce_v_frontier_outgoing_e_by_src(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier, - EdgeSrcValueInputWrapper edge_src_value_input, - EdgeDstValueInputWrapper edge_dst_value_input, - EdgeValueInputWrapper edge_value_input, - EdgeOp e_op, - ReduceOp reduce_op, - bool do_expensive_check = false) -{ - return detail::transform_reduce_v_frontier_outgoing_e_by_src_dst(handle, - graph_view, - frontier, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op, - reduce_op, - do_expensive_check); -} - -/** - * @brief Iterate over outgoing edges from the current vertex frontier and reduce valid edge functor - * outputs by (tagged-)destination ID. - * - * Edge functor outputs are thrust::optional objects and invalid if thrust::nullopt. Vertices are - * assumed to be tagged if VertexFrontierBucketType::key_type is a tuple of a vertex type and a tag - * type (VertexFrontierBucketType::key_type is identical to a vertex type otherwise). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexFrontierBucketType Type of the vertex frontier bucket class which abstracts the - * current (tagged-)vertex frontier. - * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. - * @tparam EdgeDstValueInputWrapper Type of the wrapper for edge destination property values. - * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. - * @tparam EdgeOp Type of the quinary edge operator. - * @tparam ReduceOp Type of the binary reduction operator. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param frontier VertexFrontierBucketType class object for the current vertex frontier. - * @param edge_src_value_input Wrapper used to access source input property values (for the edge - * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() - * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() - * (if @p e_op does not access source property values). Use update_edge_src_property to fill the - * wrapper. - * @param edge_dst_value_input Wrapper used to access destination input property values (for the - * edge destinations assigned to this process in multi-GPU). Use either - * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or - * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property - * values). Use update_edge_dst_property to fill the wrapper. - * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned - * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p e_op needs to - * access edge property values) or cugraph::edge_dummy_property_t::view() (if @p e_op does not - * access edge property values). - * @param e_op Quinary operator takes edge (tagged-)source, edge destination, property values for - * the source, destination, and edge and returns 1) thrust::nullopt (if invalid and to be - * discarded); 2) dummy (but valid) thrust::optional object (e.g. - * thrust::optional{std::byte{0}}, if vertices are not tagged and ReduceOp::value_type is - * void); 3) a tag (if vertices are tagged and ReduceOp::value_type is void); 4) a value to be - * reduced using the @p reduce_op (if vertices are not tagged and ReduceOp::value_type is not void); - * or 5) a tuple of a tag and a value to be reduced (if vertices are tagged and ReduceOp::value_type - * is not void). - * @param reduce_op Binary operator that takes two input arguments and reduce the two values to one. - * There are pre-defined reduction operators in prims/reduce_op.cuh. It is - * recommended to use the pre-defined reduction operators whenever possible as the current (and - * future) implementations of graph primitives may check whether @p ReduceOp is a known type (or has - * known member variables) to take a more optimized code path. See the documentation in the - * reduce_op.cuh file for instructions on writing custom reduction operators. - * @return Tuple of key values and payload values (if ReduceOp::value_type is not void) or just key - * values (if ReduceOp::value_type is void). Keys in the return values are sorted in ascending order - * using a vertex ID as the primary key and a tag (if relevant) as the secondary key. - */ -template -std::conditional_t< - !std::is_same_v, - std::tuple( - 0, rmm::cuda_stream_view{})), - decltype(detail::allocate_optional_dataframe_buffer( - 0, rmm::cuda_stream_view{}))>, - decltype(allocate_dataframe_buffer( - 0, rmm::cuda_stream_view{}))> -transform_reduce_v_frontier_outgoing_e_by_dst(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexFrontierBucketType const& frontier, - EdgeSrcValueInputWrapper edge_src_value_input, - EdgeDstValueInputWrapper edge_dst_value_input, - EdgeValueInputWrapper edge_value_input, - EdgeOp e_op, - ReduceOp reduce_op, - bool do_expensive_check = false) -{ - return detail::transform_reduce_v_frontier_outgoing_e_by_src_dst(handle, - graph_view, - frontier, - edge_src_value_input, - edge_dst_value_input, - edge_value_input, - e_op, - reduce_op, - do_expensive_check); -} - -} // namespace cugraph diff --git a/cpp/src/prims/update_edge_src_dst_property.cuh b/cpp/src/prims/update_edge_src_dst_property.cuh index 1bfdc23c66d..2f842f710ca 100644 --- a/cpp/src/prims/update_edge_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_src_dst_property.cuh @@ -16,6 +16,7 @@ #pragma once #include "detail/graph_partition_utils.cuh" +#include "prims/vertex_frontier.cuh" #include #include @@ -265,8 +266,8 @@ template void update_edge_major_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, VertexPropertyInputIterator vertex_property_input_first, EdgeMajorPropertyOutputWrapper edge_major_property_output) { @@ -288,12 +289,12 @@ void update_edge_major_property(raft::handle_t const& handle, auto const minor_comm_rank = minor_comm.get_rank(); auto const minor_comm_size = minor_comm.get_size(); - auto rx_counts = - host_scalar_allgather(minor_comm, - static_cast(thrust::distance(vertex_first, vertex_last)), - handle.get_stream()); - auto max_rx_size = - std::reduce(rx_counts.begin(), rx_counts.end(), size_t{0}, [](auto lhs, auto rhs) { + auto local_v_list_sizes = host_scalar_allgather( + minor_comm, + static_cast(thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last)), + handle.get_stream()); + auto max_rx_size = std::reduce( + local_v_list_sizes.begin(), local_v_list_sizes.end(), size_t{0}, [](auto lhs, auto rhs) { return std::max(lhs, rhs); }); rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); @@ -317,7 +318,7 @@ void update_edge_major_property(raft::handle_t const& handle, graph_view.local_vertex_partition_view()); if constexpr (contains_packed_bool_element) { auto bool_first = thrust::make_transform_iterator( - vertex_first, + sorted_unique_vertex_first, cuda::proclaim_return_type([vertex_property_input_first, vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); @@ -325,34 +326,41 @@ void update_edge_major_property(raft::handle_t const& handle, *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); })); - pack_bools(handle, - bool_first, - bool_first + thrust::distance(vertex_first, vertex_last), - rx_value_first); + pack_bools( + handle, + bool_first, + bool_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + rx_value_first); } else { auto map_first = thrust::make_transform_iterator( - vertex_first, + sorted_unique_vertex_first, cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) - thrust::gather(handle.get_thrust_policy(), - map_first, - map_first + thrust::distance(vertex_first, vertex_last), - vertex_property_input_first, - rx_value_first); + thrust::gather( + handle.get_thrust_policy(), + map_first, + map_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + vertex_property_input_first, + rx_value_first); } } // FIXME: these broadcast operations can be placed between ncclGroupStart() and // ncclGroupEnd() - device_bcast( - minor_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + device_bcast(minor_comm, + sorted_unique_vertex_first, + rx_vertices.begin(), + local_v_list_sizes[i], + i, + handle.get_stream()); device_bcast(minor_comm, rx_value_first, rx_value_first, - contains_packed_bool_element ? packed_bool_size(rx_counts[i]) : rx_counts[i], + contains_packed_bool_element ? packed_bool_size(local_v_list_sizes[i]) + : local_v_list_sizes[i], i, handle.get_stream()); @@ -360,7 +368,7 @@ void update_edge_major_property(raft::handle_t const& handle, thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(rx_counts[i]), + thrust::make_counting_iterator(local_v_list_sizes[i]), [rx_vertex_first = rx_vertices.begin(), rx_value_first, edge_partition_key_first = ((*edge_partition_keys)[i]).begin(), @@ -386,7 +394,7 @@ void update_edge_major_property(raft::handle_t const& handle, thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(static_cast(rx_counts[i])), + thrust::make_counting_iterator(static_cast(local_v_list_sizes[i])), [edge_partition, rx_vertex_first = rx_vertices.begin(), rx_value_first, @@ -407,7 +415,7 @@ void update_edge_major_property(raft::handle_t const& handle, // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), rx_value_first, - rx_value_first + rx_counts[i], + rx_value_first + local_v_list_sizes[i], map_first, edge_partition_value_firsts[i]); } @@ -420,20 +428,22 @@ void update_edge_major_property(raft::handle_t const& handle, assert(edge_partition_value_firsts.size() == size_t{1}); if constexpr (contains_packed_bool_element) { thrust::for_each(handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [vertex_property_input_first, output_value_first = edge_partition_value_firsts[0]] __device__(auto v) { bool val = static_cast(*(vertex_property_input_first + v)); packed_bool_atomic_set(output_value_first, v, val); }); } else { - auto val_first = thrust::make_permutation_iterator(vertex_property_input_first, vertex_first); - thrust::scatter(handle.get_thrust_policy(), - val_first, - val_first + thrust::distance(vertex_first, vertex_last), - vertex_first, - edge_partition_value_firsts[0]); + auto val_first = + thrust::make_permutation_iterator(vertex_property_input_first, sorted_unique_vertex_first); + thrust::scatter( + handle.get_thrust_policy(), + val_first, + val_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + sorted_unique_vertex_first, + edge_partition_value_firsts[0]); } } } @@ -455,13 +465,11 @@ void update_edge_minor_property(raft::handle_t const& handle, auto edge_partition_value_first = edge_minor_property_output.value_first(); if constexpr (GraphViewType::is_multi_gpu) { - using vertex_t = typename GraphViewType::vertex_type; - using bcast_buffer_type = - decltype(allocate_dataframe_buffer< - std::conditional_t>( - size_t{0}, handle.get_stream())); + using vertex_t = typename GraphViewType::vertex_type; + using bcast_buffer_type = dataframe_buffer_type_t< + std::conditional_t>; auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); @@ -487,8 +495,8 @@ void update_edge_minor_property(raft::handle_t const& handle, (static_cast(graph_view.compute_number_of_edges(handle) / comm_size) * sizeof(vertex_t)) / std::max(bcast_size, size_t{1}); - num_concurrent_bcasts = std::max(num_concurrent_bcasts, size_t{1}); - num_concurrent_bcasts = std::min(num_concurrent_bcasts, static_cast(major_comm_size)); + num_concurrent_bcasts = + std::min(std::max(num_concurrent_bcasts, size_t{1}), static_cast(major_comm_size)); auto num_rounds = (static_cast(major_comm_size) + num_concurrent_bcasts - size_t{1}) / num_concurrent_bcasts; @@ -532,15 +540,17 @@ void update_edge_minor_property(raft::handle_t const& handle, *(graph_view.local_sorted_unique_edge_dst_vertex_partition_offsets()); } } else { - std::vector rx_counts(major_comm_size, size_t{0}); + std::vector local_v_list_sizes(major_comm_size, size_t{0}); for (int i = 0; i < major_comm_size; ++i) { auto minor_range_vertex_partition_id = compute_local_edge_partition_minor_range_vertex_partition_id_t{ major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank}(i); - rx_counts[i] = graph_view.vertex_partition_range_size(minor_range_vertex_partition_id); + local_v_list_sizes[i] = + graph_view.vertex_partition_range_size(minor_range_vertex_partition_id); } std::vector rx_displacements(major_comm_size, size_t{0}); - std::exclusive_scan(rx_counts.begin(), rx_counts.end(), rx_displacements.begin(), size_t{0}); + std::exclusive_scan( + local_v_list_sizes.begin(), local_v_list_sizes.end(), rx_displacements.begin(), size_t{0}); key_offsets_or_rx_displacements = std::move(rx_displacements); } @@ -683,8 +693,8 @@ template void update_edge_minor_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, VertexPropertyInputIterator vertex_property_input_first, EdgeMinorPropertyOutputWrapper edge_minor_property_output) { @@ -706,22 +716,49 @@ void update_edge_minor_property(raft::handle_t const& handle, auto const major_comm_rank = major_comm.get_rank(); auto const major_comm_size = major_comm.get_size(); - auto rx_counts = - host_scalar_allgather(major_comm, - static_cast(thrust::distance(vertex_first, vertex_last)), - handle.get_stream()); - auto max_rx_size = - std::reduce(rx_counts.begin(), rx_counts.end(), size_t{0}, [](auto lhs, auto rhs) { - return std::max(lhs, rhs); - }); - rmm::device_uvector rx_vertices(max_rx_size, handle.get_stream()); - auto rx_tmp_buffer = allocate_dataframe_buffer< - std::conditional_t>( - contains_packed_bool_element ? packed_bool_size(max_rx_size) : max_rx_size, - handle.get_stream()); - auto rx_value_first = get_dataframe_buffer_begin(rx_tmp_buffer); + auto v_list_size = + static_cast(thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last)); + std::array v_list_range = {vertex_t{0}, vertex_t{0}}; + if (v_list_size > 0) { + rmm::device_uvector tmps(2, handle.get_stream()); + thrust::tabulate(handle.get_thrust_policy(), + tmps.begin(), + tmps.end(), + [sorted_unique_vertex_first, v_list_size] __device__(size_t i) { + return (i == 0) ? *sorted_unique_vertex_first + : (*(sorted_unique_vertex_first + (v_list_size - 1)) + 1); + }); + raft::update_host(v_list_range.data(), tmps.data(), 2, handle.get_stream()); + handle.sync_stream(); + } + + auto local_v_list_sizes = host_scalar_allgather(major_comm, v_list_size, handle.get_stream()); + auto local_v_list_range_firsts = + host_scalar_allgather(major_comm, v_list_range[0], handle.get_stream()); + auto local_v_list_range_lasts = + host_scalar_allgather(major_comm, v_list_range[1], handle.get_stream()); + + std::optional> v_list_bitmap{std::nullopt}; + if (major_comm_size > 1) { + double avg_fill_ratio{0.0}; + for (int i = 0; i < major_comm_size; ++i) { + auto num_keys = static_cast(local_v_list_sizes[i]); + auto range_size = local_v_list_range_lasts[i] - local_v_list_range_firsts[i]; + avg_fill_ratio += + (range_size > 0) ? (num_keys / static_cast(range_size)) : double{0.0}; + } + avg_fill_ratio /= static_cast(major_comm_size); + + constexpr double threshold_ratio = + 0.0 /* tuning parameter */ / static_cast(sizeof(vertex_t) * 8); + if (avg_fill_ratio > threshold_ratio) { + v_list_bitmap = compute_vertex_list_bitmap_info(sorted_unique_vertex_first, + sorted_unique_vertex_last, + local_v_list_range_firsts[major_comm_rank], + local_v_list_range_lasts[major_comm_rank], + handle.get_stream()); + } + } std::optional> key_offsets{}; if constexpr (GraphViewType::is_storage_transposed) { @@ -735,13 +772,23 @@ void update_edge_minor_property(raft::handle_t const& handle, graph_view.local_edge_partition_view(size_t{0})); auto edge_partition_keys = edge_minor_property_output.keys(); for (int i = 0; i < major_comm_size; ++i) { + rmm::device_uvector rx_vertices(local_v_list_sizes[i], handle.get_stream()); + auto rx_tmp_buffer = allocate_dataframe_buffer< + std::conditional_t>( + contains_packed_bool_element ? packed_bool_size(local_v_list_sizes[i]) + : local_v_list_sizes[i], + handle.get_stream()); + auto rx_value_first = get_dataframe_buffer_begin(rx_tmp_buffer); + if (i == major_comm_rank) { auto vertex_partition = vertex_partition_device_view_t( graph_view.local_vertex_partition_view()); if constexpr (contains_packed_bool_element) { auto bool_first = thrust::make_transform_iterator( - vertex_first, + sorted_unique_vertex_first, cuda::proclaim_return_type([vertex_property_input_first, vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); @@ -749,34 +796,53 @@ void update_edge_minor_property(raft::handle_t const& handle, *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); })); - pack_bools(handle, - bool_first, - bool_first + thrust::distance(vertex_first, vertex_last), - rx_value_first); + pack_bools( + handle, + bool_first, + bool_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + rx_value_first); } else { auto map_first = thrust::make_transform_iterator( - vertex_first, + sorted_unique_vertex_first, cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) - thrust::gather(handle.get_thrust_policy(), - map_first, - map_first + thrust::distance(vertex_first, vertex_last), - vertex_property_input_first, - rx_value_first); + thrust::gather( + handle.get_thrust_policy(), + map_first, + map_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + vertex_property_input_first, + rx_value_first); } } // FIXME: these broadcast operations can be placed between ncclGroupStart() and // ncclGroupEnd() - device_bcast( - major_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + std::variant, decltype(sorted_unique_vertex_first)> + v_list{}; + if (v_list_bitmap) { + v_list = + (i == major_comm_rank) + ? raft::device_span((*v_list_bitmap).data(), (*v_list_bitmap).size()) + : raft::device_span(static_cast(nullptr), size_t{0}); + } else { + v_list = sorted_unique_vertex_first; + } + device_bcast_vertex_list(major_comm, + v_list, + rx_vertices.begin(), + local_v_list_range_firsts[i], + local_v_list_range_lasts[i], + local_v_list_sizes[i], + i, + handle.get_stream()); device_bcast(major_comm, rx_value_first, rx_value_first, - contains_packed_bool_element ? packed_bool_size(rx_counts[i]) : rx_counts[i], + contains_packed_bool_element ? packed_bool_size(local_v_list_sizes[i]) + : local_v_list_sizes[i], i, handle.get_stream()); @@ -784,7 +850,7 @@ void update_edge_minor_property(raft::handle_t const& handle, thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(rx_counts[i]), + thrust::make_counting_iterator(local_v_list_sizes[i]), [rx_vertex_first = rx_vertices.begin(), rx_value_first, subrange_key_first = (*edge_partition_keys).begin() + (*key_offsets)[i], @@ -812,7 +878,7 @@ void update_edge_minor_property(raft::handle_t const& handle, thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(static_cast(rx_counts[i])), + thrust::make_counting_iterator(static_cast(local_v_list_sizes[i])), [edge_partition, rx_vertex_first = rx_vertices.begin(), rx_value_first, @@ -833,7 +899,7 @@ void update_edge_minor_property(raft::handle_t const& handle, // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), rx_value_first, - rx_value_first + rx_counts[i], + rx_value_first + local_v_list_sizes[i], map_first, edge_partition_value_first); } @@ -844,20 +910,22 @@ void update_edge_minor_property(raft::handle_t const& handle, graph_view.local_edge_partition_src_range_size()); if constexpr (contains_packed_bool_element) { thrust::for_each(handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [vertex_property_input_first, output_value_first = edge_partition_value_first] __device__(auto v) { bool val = static_cast(*(vertex_property_input_first + v)); packed_bool_atomic_set(output_value_first, v, val); }); } else { - auto val_first = thrust::make_permutation_iterator(vertex_property_input_first, vertex_first); - thrust::scatter(handle.get_thrust_policy(), - val_first, - val_first + thrust::distance(vertex_first, vertex_last), - vertex_first, - edge_partition_value_first); + auto val_first = + thrust::make_permutation_iterator(vertex_property_input_first, sorted_unique_vertex_first); + thrust::scatter( + handle.get_thrust_policy(), + val_first, + val_first + thrust::distance(sorted_unique_vertex_first, sorted_unique_vertex_last), + sorted_unique_vertex_first, + edge_partition_value_first); } } } @@ -909,8 +977,9 @@ void update_edge_src_property(raft::handle_t const& handle, /** * @brief Update graph edge source property values from the input vertex property values. * - * This version updates only a subset of graph edge source property values. [@p vertex_first, @p - * vertex_last) specifies the vertices with new property values to be updated. + * This version updates only a subset of graph edge source property values. [@p + * sorted_unique_vertex_first, @p sorted_unique_vertex_last) specifies the vertices with new + * property values to be updated. * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexIterator Type of the iterator for vertex identifiers. @@ -919,10 +988,12 @@ void update_edge_src_property(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex with a new value to be - * updated. v in [vertex_first, vertex_last) should be distinct (and should belong to the vertex - * partition assigned to this process in multi-GPU), otherwise undefined behavior. - * @param vertex_last Iterator pointing to the last (exclusive) vertex with a new value. + * @param sorted_unique_vertex_first Iterator pointing to the first (inclusive) vertex with a new + * value to be updated. v in [sorted_unique_vertex_first, sorted_unique_vertex_last) should be + * sorted & distinct (and should belong to the vertex partition assigned to this process in + * multi-GPU), otherwise undefined behavior. + * @param sorted_unique_vertex_last Iterator pointing to the last (exclusive) vertex with a new + * value. * @param vertex_property_input_first Iterator pointing to the vertex property value for the first * (inclusive) vertex (of the vertex partition assigned to this process in multi-GPU). * `vertex_property_input_last` (exclusive) is deduced as @p vertex_property_input_first + @p @@ -937,8 +1008,8 @@ template void update_edge_src_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, VertexPropertyInputIterator vertex_property_input_first, EdgeSrcValueOutputWrapper edge_src_property_output, bool do_expensive_check = false) @@ -946,8 +1017,8 @@ void update_edge_src_property(raft::handle_t const& handle, if (do_expensive_check) { auto num_invalids = thrust::count_if( handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), local_vertex_partition_range_last = graph_view.local_vertex_partition_range_last()] __device__(auto v) { @@ -958,23 +1029,23 @@ void update_edge_src_property(raft::handle_t const& handle, num_invalids = host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); } - CUGRAPH_EXPECTS( - num_invalids == 0, - "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + CUGRAPH_EXPECTS(num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in " + "[sorted_unique_vertex_first, sorted_unique_vertex_last)."); } if constexpr (GraphViewType::is_storage_transposed) { detail::update_edge_minor_property(handle, graph_view, - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, vertex_property_input_first, edge_src_property_output); } else { detail::update_edge_major_property(handle, graph_view, - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, vertex_property_input_first, edge_src_property_output); } @@ -1026,8 +1097,9 @@ void update_edge_dst_property(raft::handle_t const& handle, /** * @brief Update graph edge destination property values from the input vertex property values. * - * This version updates only a subset of graph edge destination property values. [@p vertex_first, - * @p vertex_last) specifies the vertices with new property values to be updated. + * This version updates only a subset of graph edge destination property values. [@p + * sorted_unique_vertex_first, @p sorted_unique_vertex_last) specifies the vertices with new + * property values to be updated. * * @tparam GraphViewType Type of the passed non-owning graph object. * @tparam VertexIterator Type of the iterator for vertex identifiers. @@ -1037,10 +1109,12 @@ void update_edge_dst_property(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex with a new value to be - * updated. v in [vertex_first, vertex_last) should be distinct (and should belong to the vertex - * partition assigned to this process in multi-GPU), otherwise undefined behavior. - * @param vertex_last Iterator pointing to the last (exclusive) vertex with a new value. + * @param sorted_unique_vertex_first Iterator pointing to the first (inclusive) vertex with a new + * value to be updated. v in [sorted_unique_vertex_first, sorted_unique_vertex_last) should be + * sorted & distinct (and should belong to the vertex partition assigned to this process in + * multi-GPU), otherwise undefined behavior. + * @param sorted_unique_vertex_last Iterator pointing to the last (exclusive) vertex with a new + * value. * @param vertex_property_input_first Iterator pointing to the vertex property value for the first * (inclusive) vertex (of the vertex partition assigned to this process in multi-GPU). * `vertex_property_input_last` (exclusive) is deduced as @p vertex_property_input_first + @p @@ -1055,8 +1129,8 @@ template void update_edge_dst_property(raft::handle_t const& handle, GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, VertexPropertyInputIterator vertex_property_input_first, EdgeDstValueOutputWrapper edge_dst_property_output, bool do_expensive_check = false) @@ -1064,8 +1138,8 @@ void update_edge_dst_property(raft::handle_t const& handle, if (do_expensive_check) { auto num_invalids = thrust::count_if( handle.get_thrust_policy(), - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, [local_vertex_partition_range_first = graph_view.local_vertex_partition_range_first(), local_vertex_partition_range_last = graph_view.local_vertex_partition_range_last()] __device__(auto v) { @@ -1076,23 +1150,23 @@ void update_edge_dst_property(raft::handle_t const& handle, num_invalids = host_scalar_allreduce(comm, num_invalids, raft::comms::op_t::SUM, handle.get_stream()); } - CUGRAPH_EXPECTS( - num_invalids == 0, - "Invalid input argument: invalid or non-local vertices in [vertex_first, vertex_last)."); + CUGRAPH_EXPECTS(num_invalids == 0, + "Invalid input argument: invalid or non-local vertices in " + "[sorted_unique_vertex_first, sorted_unique_vertex_last)."); } if constexpr (GraphViewType::is_storage_transposed) { detail::update_edge_major_property(handle, graph_view, - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, vertex_property_input_first, edge_dst_property_output); } else { detail::update_edge_minor_property(handle, graph_view, - vertex_first, - vertex_last, + sorted_unique_vertex_first, + sorted_unique_vertex_last, vertex_property_input_first, edge_dst_property_output); } diff --git a/cpp/src/prims/vertex_frontier.cuh b/cpp/src/prims/vertex_frontier.cuh index b13e6bfd458..6e7d8515beb 100644 --- a/cpp/src/prims/vertex_frontier.cuh +++ b/cpp/src/prims/vertex_frontier.cuh @@ -15,15 +15,24 @@ */ #pragma once +#include "prims/detail/multi_stream_utils.cuh" + +#include #include #include +#include +#include #include +#include #include #include #include +#include +#include +#include #include #include #include @@ -48,6 +57,191 @@ namespace cugraph { +template +KeyIterator compute_key_lower_bound(KeyIterator sorted_unique_key_first, + KeyIterator sorted_unique_key_last, + vertex_t v_threshold, + rmm::cuda_stream_view stream_view) +{ + using key_t = typename thrust::iterator_traits::value_type; + + if constexpr (std::is_same_v) { + return thrust::lower_bound( + rmm::exec_policy(stream_view), sorted_unique_key_first, sorted_unique_key_last, v_threshold); + } else { + key_t k_threshold{}; + thrust::get<0>(k_threshold) = v_threshold; + return thrust::lower_bound( + rmm::exec_policy(stream_view), + sorted_unique_key_first, + sorted_unique_key_last, + k_threshold, + [] __device__(auto lhs, auto rhs) { return thrust::get<0>(lhs) < thrust::get<0>(rhs); }); + } +} + +template +std::vector compute_key_segment_offsets(KeyIterator sorted_key_first, + KeyIterator sorted_key_last, + raft::host_span segment_offsets, + vertex_t vertex_range_first, + rmm::cuda_stream_view stream_view) +{ + using key_t = typename thrust::iterator_traits::value_type; + + std::vector h_thresholds(segment_offsets.size() - 2); + for (size_t i = 0; i < h_thresholds.size(); ++i) { + h_thresholds[i] = vertex_range_first + segment_offsets[i + 1]; + } + + rmm::device_uvector d_thresholds(h_thresholds.size(), stream_view); + raft::update_device(d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), stream_view); + + rmm::device_uvector d_offsets(d_thresholds.size(), stream_view); + if constexpr (std::is_same_v) { + thrust::lower_bound(rmm::exec_policy_nosync(stream_view), + sorted_key_first, + sorted_key_last, + d_thresholds.begin(), + d_thresholds.end(), + d_offsets.begin()); + } else { + auto sorted_vertex_first = + thrust::make_transform_iterator(sorted_key_first, thrust_tuple_get{}); + thrust::lower_bound(rmm::exec_policy_nosync(stream_view), + sorted_vertex_first, + sorted_vertex_first + thrust::distance(sorted_key_first, sorted_key_last), + d_thresholds.begin(), + d_thresholds.end(), + d_offsets.begin()); + } + + std::vector h_offsets(d_offsets.size() + 2); + raft::update_host(h_offsets.data() + 1, d_offsets.data(), d_offsets.size(), stream_view); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view)); + h_offsets[0] = size_t{0}; + h_offsets.back() = static_cast(thrust::distance(sorted_key_first, sorted_key_last)); + + return h_offsets; +} + +template +rmm::device_uvector compute_vertex_list_bitmap_info( + VertexIterator sorted_unique_vertex_first, + VertexIterator sorted_unique_vertex_last, + typename thrust::iterator_traits::value_type vertex_range_first, + typename thrust::iterator_traits::value_type vertex_range_last, + rmm::cuda_stream_view stream_view) +{ + using vertex_t = typename thrust::iterator_traits::value_type; + + auto bitmap = rmm::device_uvector( + packed_bool_size(vertex_range_last - vertex_range_first), stream_view); + rmm::device_uvector lasts(bitmap.size(), stream_view); + auto bdry_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(vertex_t{1}), + cuda::proclaim_return_type( + [vertex_range_first, + vertex_range_size = vertex_range_last - vertex_range_first] __device__(vertex_t i) { + return vertex_range_first + + static_cast( + std::min(packed_bools_per_word() * i, static_cast(vertex_range_size))); + })); + thrust::lower_bound(rmm::exec_policy_nosync(stream_view), + sorted_unique_vertex_first, + sorted_unique_vertex_last, + bdry_first, + bdry_first + bitmap.size(), + lasts.begin()); + thrust::tabulate( + rmm::exec_policy_nosync(stream_view), + bitmap.begin(), + bitmap.end(), + cuda::proclaim_return_type( + [sorted_unique_vertex_first, + vertex_range_first, + lasts = raft::device_span(lasts.data(), lasts.size())] __device__(size_t i) { + auto offset_first = (i != 0) ? lasts[i - 1] : vertex_t{0}; + auto offset_last = lasts[i]; + auto ret = packed_bool_empty_mask(); + for (auto j = offset_first; j < offset_last; ++j) { + auto v_offset = *(sorted_unique_vertex_first + j) - vertex_range_first; + ret |= packed_bool_mask(v_offset); + } + return ret; + })); + + return bitmap; +} + +template +void device_bcast_vertex_list( + raft::comms::comms_t const& comm, + std::variant, InputVertexIterator> v_list, + OutputVertexIterator output_v_first, + typename thrust::iterator_traits::value_type vertex_range_first, + typename thrust::iterator_traits::value_type vertex_range_last, + size_t v_list_size, + int root, + rmm::cuda_stream_view stream_view) +{ + using vertex_t = typename thrust::iterator_traits::value_type; + + static_assert( + std::is_same_v::value_type, vertex_t>); + + if (v_list.index() == 0) { // bitmap + rmm::device_uvector tmp_bitmap( + packed_bool_size(vertex_range_last - vertex_range_first), stream_view); + assert((comm.get_rank() != root) || (std::get<0>(v_list).size() == tmp_bitmap.size())); + device_bcast( + comm, std::get<0>(v_list).data(), tmp_bitmap.data(), tmp_bitmap.size(), root, stream_view); + rmm::device_scalar dummy(size_t{0}, stream_view); // we already know the count + detail::copy_if_nosync( + thrust::make_counting_iterator(vertex_range_first), + thrust::make_counting_iterator(vertex_range_last), + thrust::make_transform_iterator( + thrust::make_counting_iterator(vertex_t{0}), + cuda::proclaim_return_type( + [bitmap = raft::device_span( + tmp_bitmap.data(), tmp_bitmap.size())] __device__(vertex_t v_offset) { + return ((bitmap[packed_bool_offset(v_offset)] & packed_bool_mask(v_offset)) != + packed_bool_empty_mask()); + })), + output_v_first, + raft::device_span(dummy.data(), size_t{1}), + stream_view); + } else { + device_bcast(comm, std::get<1>(v_list), output_v_first, v_list_size, root, stream_view); + } +} + +template +void retrieve_vertex_list_from_bitmap( + raft::device_span bitmap, + OutputVertexIterator output_v_first, + raft::device_span count /* size = 1 */, + typename thrust::iterator_traits::value_type vertex_range_first, + typename thrust::iterator_traits::value_type vertex_range_last, + rmm::cuda_stream_view stream_view) +{ + using vertex_t = typename thrust::iterator_traits::value_type; + + assert((comm.get_rank() != root) || + (bitmap.size() >= packed_bool_size(vertex_range_last - vertex_ragne_first))); + detail::copy_if_nosync(thrust::make_counting_iterator(vertex_range_first), + thrust::make_counting_iterator(vertex_range_last), + thrust::make_transform_iterator( + thrust::make_counting_iterator(vertex_t{0}), + cuda::proclaim_return_type([bitmap] __device__(vertex_t v_offset) { + return ((bitmap[packed_bool_offset(v_offset)] & + packed_bool_mask(v_offset)) != packed_bool_empty_mask()); + })), + output_v_first, + count, + stream_view); +} + // key type is either vertex_t (tag_t == void) or thrust::tuple (tag_t != void) // if sorted_unique is true, stores unique key objects in the sorted (non-descending) order. // if false, there can be duplicates and the elements may not be sorted. @@ -328,20 +522,6 @@ class key_bucket_t { } } - auto const begin() const - { - if constexpr (std::is_same_v) { - return vertices_.index() == 0 ? std::get<0>(vertices_).begin() - : std::get<1>(vertices_).begin(); - } else { - return vertices_.index() == 0 - ? thrust::make_zip_iterator( - thrust::make_tuple(std::get<0>(vertices_).begin(), std::get<0>(tags_).begin())) - : thrust::make_zip_iterator( - thrust::make_tuple(std::get<1>(vertices_).begin(), std::get<1>(tags_).begin())); - } - } - auto begin() { CUGRAPH_EXPECTS( @@ -355,12 +535,22 @@ class key_bucket_t { } } - auto const end() const + auto const cbegin() const { - return begin() + - (vertices_.index() == 0 ? std::get<0>(vertices_).size() : std::get<1>(vertices_).size()); + if constexpr (std::is_same_v) { + return vertices_.index() == 0 ? std::get<0>(vertices_).begin() + : std::get<1>(vertices_).begin(); + } else { + return vertices_.index() == 0 + ? thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(vertices_).begin(), std::get<0>(tags_).begin())) + : thrust::make_zip_iterator( + thrust::make_tuple(std::get<1>(vertices_).begin(), std::get<1>(tags_).begin())); + } } + auto const begin() const { return cbegin(); } + auto end() { CUGRAPH_EXPECTS( @@ -369,15 +559,13 @@ class key_bucket_t { return begin() + std::get<0>(vertices_).size(); } - auto const vertex_begin() const + auto const cend() const { - return vertices_.index() == 0 ? std::get<0>(vertices_).begin() : std::get<1>(vertices_).begin(); + return begin() + + (vertices_.index() == 0 ? std::get<0>(vertices_).size() : std::get<1>(vertices_).size()); } - auto const vertex_end() const - { - return vertices_.index() == 0 ? std::get<0>(vertices_).end() : std::get<1>(vertices_).end(); - } + auto const end() const { return cend(); } auto vertex_begin() { @@ -387,6 +575,13 @@ class key_bucket_t { return std::get<0>(vertices_).begin(); } + auto const vertex_cbegin() const + { + return vertices_.index() == 0 ? std::get<0>(vertices_).begin() : std::get<1>(vertices_).begin(); + } + + auto const vertex_begin() const { return vertex_cbegin(); } + auto vertex_end() { CUGRAPH_EXPECTS( @@ -395,6 +590,13 @@ class key_bucket_t { return std::get<0>(vertices_).end(); } + auto const vertex_cend() const + { + return vertices_.index() == 0 ? std::get<0>(vertices_).end() : std::get<1>(vertices_).end(); + } + + auto const vertex_end() const { return vertex_cend(); } + bool is_owning() { return (vertices_.index() == 0); } private: diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index ccca71cdf20..ed77b330439 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -31,6 +31,8 @@ #include +#include + namespace cugraph { namespace detail { @@ -133,8 +135,14 @@ neighbor_sample_impl(raft::handle_t const& handle, } } + // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1. + auto num_hops = ((fan_out.size() % num_edge_types) == 0) + ? (fan_out.size() / num_edge_types) + : ((fan_out.size() / num_edge_types) + 1); + std::vector> level_result_src_vectors{}; std::vector> level_result_dst_vectors{}; + auto level_result_weight_vectors = edge_weight_view ? std::make_optional(std::vector>{}) : std::nullopt; @@ -147,14 +155,16 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels ? std::make_optional(std::vector>{}) : std::nullopt; - level_result_src_vectors.reserve(fan_out.size()); - level_result_dst_vectors.reserve(fan_out.size()); - if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(fan_out.size()); } - if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(fan_out.size()); } - if (level_result_edge_type_vectors) { (*level_result_edge_type_vectors).reserve(fan_out.size()); } - if (level_result_label_vectors) { (*level_result_label_vectors).reserve(fan_out.size()); } + level_result_src_vectors.reserve(num_hops); + level_result_dst_vectors.reserve(num_hops); + + if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(num_hops); } + if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(num_hops); } + if (level_result_edge_type_vectors) { (*level_result_edge_type_vectors).reserve(num_hops); } + if (level_result_label_vectors) { (*level_result_label_vectors).reserve(num_hops); } rmm::device_uvector frontier_vertices(0, handle.get_stream()); + auto frontier_vertex_labels = starting_vertex_labels ? std::make_optional(rmm::device_uvector{0, handle.get_stream()}) @@ -174,12 +184,24 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector level_sizes{}; - // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1 - auto num_hops = ((fan_out.size() % num_edge_types) == 0) - ? (fan_out.size() / num_edge_types) - : ((fan_out.size() / num_edge_types) + 1); - for (auto hop = 0; hop < num_hops; hop++) { + rmm::device_uvector level_result_src(0, handle.get_stream()); + rmm::device_uvector level_result_dst(0, handle.get_stream()); + + auto level_result_weight = + edge_weight_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_id = + edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_type = + edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_label = + starting_vertex_labels + ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { auto k_level = fan_out[(hop * num_edge_types) + edge_type_id]; rmm::device_uvector srcs(0, handle.get_stream()); @@ -194,49 +216,119 @@ neighbor_sample_impl(raft::handle_t const& handle, } if (k_level > 0) { - std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = - sample_edges(handle, - modified_graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - edge_bias_view, - rng_state, - starting_vertices, - starting_vertex_labels, - static_cast(k_level), - with_replacement); - } else { - std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = - gather_one_hop_edgelist(handle, - modified_graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - starting_vertices, - starting_vertex_labels); + std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = sample_edges( + handle, + modified_graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + edge_bias_view, + rng_state, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt, + static_cast(k_level), + with_replacement); + } else if (k_level < 0) { + std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = gather_one_hop_edgelist( + handle, + modified_graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt); + } + + auto old_size = level_result_src.size(); + level_result_src.resize(old_size + srcs.size(), handle.get_stream()); + level_result_dst.resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy( + level_result_src.begin() + old_size, srcs.begin(), srcs.size(), handle.get_stream()); + + raft::copy( + level_result_dst.begin() + old_size, dsts.begin(), srcs.size(), handle.get_stream()); + + if (weights) { + (*level_result_weight).resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy(level_result_weight->begin() + old_size, + weights->begin(), + srcs.size(), + handle.get_stream()); } - level_sizes.push_back(srcs.size()); - level_result_src_vectors.push_back(std::move(srcs)); - level_result_dst_vectors.push_back(std::move(dsts)); + if (edge_ids) { + (*level_result_edge_id).resize(old_size + srcs.size(), handle.get_stream()); + raft::copy(level_result_edge_id->begin() + old_size, + edge_ids->begin(), + srcs.size(), + handle.get_stream()); + } + if (edge_types) { + (*level_result_edge_type).resize(old_size + srcs.size(), handle.get_stream()); - if (weights) { (*level_result_weight_vectors).push_back(std::move(*weights)); } - if (edge_ids) { (*level_result_edge_id_vectors).push_back(std::move(*edge_ids)); } - if (edge_types) { (*level_result_edge_type_vectors).push_back(std::move(*edge_types)); } - if (labels) { (*level_result_label_vectors).push_back(std::move(*labels)); } + raft::copy(level_result_edge_type->begin() + old_size, + edge_types->begin(), + srcs.size(), + handle.get_stream()); + } + + if (labels) { + (*level_result_label).resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy(level_result_label->begin() + old_size, + labels->begin(), + srcs.size(), + handle.get_stream()); + } if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } } + level_sizes.push_back(level_result_src.size()); + level_result_src_vectors.push_back(std::move(level_result_src)); + level_result_dst_vectors.push_back(std::move(level_result_dst)); + + if (level_result_weight) { + (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); + } + if (level_result_edge_id) { + (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); + } + if (level_result_edge_type) { + (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); + } + if (level_result_label) { + (*level_result_label_vectors).push_back(std::move(*level_result_label)); + } + // FIXME: We should modify vertex_partition_range_lasts to return a raft::host_span // rather than making a copy. auto vertex_partition_range_lasts = modified_graph_view.vertex_partition_range_lasts(); std::tie(frontier_vertices, frontier_vertex_labels, vertex_used_as_source) = prepare_next_frontier( handle, - starting_vertices, - starting_vertex_labels, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, frontier_vertex_labels @@ -249,14 +341,6 @@ neighbor_sample_impl(raft::handle_t const& handle, prior_sources_behavior, dedupe_sources, do_expensive_check); - - starting_vertices = - raft::device_span(frontier_vertices.data(), frontier_vertices.size()); - - if (frontier_vertex_labels) { - starting_vertex_labels = raft::device_span(frontier_vertex_labels->data(), - frontier_vertex_labels->size()); - } } auto result_size = std::reduce(level_sizes.begin(), level_sizes.end()); @@ -339,7 +423,7 @@ neighbor_sample_impl(raft::handle_t const& handle, if (return_hops) { result_hops = rmm::device_uvector(result_size, handle.get_stream()); output_offset = 0; - for (size_t i = 0; i < fan_out.size(); ++i) { + for (size_t i = 0; i < num_hops; ++i) { scalar_fill( handle, result_hops->data() + output_offset, level_sizes[i], static_cast(i)); output_offset += level_sizes[i]; diff --git a/cpp/src/structure/create_graph_from_edgelist_impl.cuh b/cpp/src/structure/create_graph_from_edgelist_impl.cuh index 9796ddd60a1..cd98db31654 100644 --- a/cpp/src/structure/create_graph_from_edgelist_impl.cuh +++ b/cpp/src/structure/create_graph_from_edgelist_impl.cuh @@ -44,6 +44,7 @@ #include #include +#include namespace cugraph { @@ -299,6 +300,121 @@ bool check_no_parallel_edge(raft::handle_t const& handle, (org_edge_first + edgelist_srcs.size()); } +template +std::vector> +split_edge_chunk_compressed_elements_to_local_edge_partitions( + raft::handle_t const& handle, + std::vector>&& edgelist_compressed_elements, + std::vector> const& edgelist_edge_offset_vectors, + std::vector const& edge_partition_edge_counts, + std::vector> const& edge_partition_intra_partition_segment_offset_vectors, + std::vector> const& + edge_partition_intra_segment_copy_output_displacement_vectors, + size_t element_size) +{ + auto num_chunks = edgelist_compressed_elements.size(); + auto num_edge_partitions = edge_partition_edge_counts.size(); + auto num_segments = edge_partition_intra_partition_segment_offset_vectors[0].size() - 1; + for (size_t i = 0; i < edge_partition_intra_partition_segment_offset_vectors.size(); ++i) { + assert(edge_partition_intra_partition_segment_offset_vectors[i].size() == (num_segments + 1)); + } + + std::vector> edge_partition_compressed_elements{}; + edge_partition_compressed_elements.reserve(num_edge_partitions); + for (size_t i = 0; i < num_edge_partitions; ++i) { + edge_partition_compressed_elements.push_back(rmm::device_uvector( + edge_partition_edge_counts[i] * element_size, handle.get_stream())); + } + + for (size_t i = 0; i < num_edge_partitions; ++i) { + for (size_t j = 0; j < num_segments; ++j) { + for (size_t k = 0; k < num_chunks; ++k) { + auto segment_offset = edgelist_edge_offset_vectors[k][i * num_segments + j]; + auto segment_size = edgelist_edge_offset_vectors[k][i * num_segments + j + 1] - + edgelist_edge_offset_vectors[k][i * num_segments + j]; + auto output_offset = + edge_partition_intra_partition_segment_offset_vectors[i][j] + + edge_partition_intra_segment_copy_output_displacement_vectors[i][j * num_chunks + k]; + thrust::copy( + handle.get_thrust_policy(), + edgelist_compressed_elements[k].begin() + segment_offset * element_size, + edgelist_compressed_elements[k].begin() + (segment_offset + segment_size) * element_size, + edge_partition_compressed_elements[i].begin() + output_offset * element_size); + } + } + } + edgelist_compressed_elements.clear(); + + return edge_partition_compressed_elements; +} + +template +std::vector> split_edge_chunk_elements_to_local_edge_partitions( + raft::handle_t const& handle, + std::vector>&& edgelist_elements, + std::vector> const& edgelist_edge_offset_vectors, + std::vector const& edge_partition_edge_counts, + std::vector> const& edge_partition_intra_partition_segment_offset_vectors, + std::vector> const& + edge_partition_intra_segment_copy_output_displacement_vectors) +{ + static_assert(std::is_arithmetic_v); // otherwise, unimplemented. + auto num_chunks = edgelist_elements.size(); + auto num_edge_partitions = edge_partition_edge_counts.size(); + auto num_segments = edge_partition_intra_partition_segment_offset_vectors[0].size() - 1; + for (size_t i = 0; i < edge_partition_intra_partition_segment_offset_vectors.size(); ++i) { + assert(edge_partition_intra_partition_segment_offset_vectors[i].size() == (num_segments + 1)); + } + + std::vector> edge_partition_elements{}; + edge_partition_elements.reserve(num_edge_partitions); + for (size_t i = 0; i < num_edge_partitions; ++i) { + edge_partition_elements.push_back( + rmm::device_uvector(edge_partition_edge_counts[i], handle.get_stream())); + } + + for (size_t i = 0; i < num_edge_partitions; ++i) { + for (size_t j = 0; j < num_segments; ++j) { + for (size_t k = 0; k < num_chunks; ++k) { + auto segment_offset = edgelist_edge_offset_vectors[k][i * num_segments + j]; + auto segment_size = edgelist_edge_offset_vectors[k][i * num_segments + j + 1] - + edgelist_edge_offset_vectors[k][i * num_segments + j]; + auto output_offset = + edge_partition_intra_partition_segment_offset_vectors[i][j] + + edge_partition_intra_segment_copy_output_displacement_vectors[i][j * num_chunks + k]; + thrust::copy(handle.get_thrust_policy(), + edgelist_elements[k].begin() + segment_offset, + edgelist_elements[k].begin() + (segment_offset + segment_size), + edge_partition_elements[i].begin() + output_offset); + } + } + } + edgelist_elements.clear(); + + return edge_partition_elements; +} + +template +void decompress_vertices(raft::handle_t const& handle, + raft::device_span compressed_vertices, + raft::device_span vertices, + size_t compressed_v_size) +{ + auto input_v_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [byte_first = compressed_vertices.begin(), compressed_v_size] __device__(size_t i) { + uint64_t v{0}; + for (size_t j = 0; j < compressed_v_size; ++j) { + auto b = *(byte_first + i * compressed_v_size + j); + v |= static_cast(b) << (8 * j); + } + return static_cast(v); + })); + thrust::copy( + handle.get_thrust_policy(), input_v_first, input_v_first + vertices.size(), vertices.begin()); +} + template >>&& edge_partition_edgelist_weights, std::optional>>&& edge_partition_edgelist_edge_ids, std::optional>>&& edge_partition_edgelist_edge_types, - std::vector> const& edgelist_intra_partition_segment_offsets, + std::vector> const& edgelist_intra_partition_segment_offset_vectors, graph_properties_t graph_properties, bool renumber) { @@ -347,14 +463,14 @@ create_graph_from_partitioned_edgelist( src_ptrs[i] = edge_partition_edgelist_srcs[i].begin(); dst_ptrs[i] = edge_partition_edgelist_dsts[i].begin(); } - auto [renumber_map_labels, meta] = - cugraph::renumber_edgelist(handle, - std::move(local_vertices), - src_ptrs, - dst_ptrs, - edgelist_edge_counts, - edgelist_intra_partition_segment_offsets, - store_transposed); + auto [renumber_map_labels, meta] = cugraph::renumber_edgelist( + handle, + std::move(local_vertices), + src_ptrs, + dst_ptrs, + edgelist_edge_counts, + edgelist_intra_partition_segment_offset_vectors, + store_transposed); auto num_segments_per_vertex_partition = static_cast(meta.edge_partition_segment_offsets.size() / minor_comm_size); @@ -369,7 +485,7 @@ create_graph_from_partitioned_edgelist( if (edge_partition_edgelist_edge_ids) { element_size += sizeof(edge_id_t); } if (edge_partition_edgelist_edge_types) { element_size += sizeof(edge_type_t); } auto constexpr mem_frugal_ratio = - 0.25; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the + 0.05; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the // total_global_mem, switch to the memory frugal approach auto mem_frugal_threshold = static_cast(static_cast(total_global_mem / element_size) * mem_frugal_ratio); @@ -684,11 +800,13 @@ create_graph_from_partitioned_edgelist( std::move(edge_partition_offsets), std::move(edge_partition_indices), std::move(edge_partition_dcs_nzd_vertices), - cugraph::graph_meta_t{meta.number_of_vertices, - meta.number_of_edges, - graph_properties, - meta.partition, - meta.edge_partition_segment_offsets}), + cugraph::graph_meta_t{ + meta.number_of_vertices, + meta.number_of_edges, + graph_properties, + meta.partition, + meta.edge_partition_segment_offsets, + meta.edge_partition_hypersparse_degree_offsets}), std::move(edge_weights), std::move(edge_ids), std::move(edge_types), @@ -790,7 +908,7 @@ create_graph_from_edgelist_impl( handle.sync_stream(); std::vector edgelist_edge_counts(minor_comm_size, edge_t{0}); - auto edgelist_intra_partition_segment_offsets = std::vector>( + auto edgelist_intra_partition_segment_offset_vectors = std::vector>( minor_comm_size, std::vector(major_comm_size + 1, edge_t{0})); for (int i = 0; i < minor_comm_size; ++i) { edgelist_edge_counts[i] = std::accumulate(h_edge_counts.begin() + major_comm_size * i, @@ -798,7 +916,7 @@ create_graph_from_edgelist_impl( edge_t{0}); std::partial_sum(h_edge_counts.begin() + major_comm_size * i, h_edge_counts.begin() + major_comm_size * (i + 1), - edgelist_intra_partition_segment_offsets[i].begin() + 1); + edgelist_intra_partition_segment_offset_vectors[i].begin() + 1); } std::vector edgelist_displacements(minor_comm_size, edge_t{0}); std::partial_sum(edgelist_edge_counts.begin(), @@ -898,7 +1016,7 @@ create_graph_from_edgelist_impl( std::move(edge_partition_edgelist_weights), std::move(edge_partition_edgelist_edge_ids), std::move(edge_partition_edgelist_edge_types), - edgelist_intra_partition_segment_offsets, + edgelist_intra_partition_segment_offset_vectors, graph_properties, renumber); } @@ -1021,30 +1139,66 @@ create_graph_from_edgelist_impl( } } - // 1. groupby each edge chunks to their target local adjacency matrix partition (and further + auto num_chunks = edgelist_srcs.size(); + + // 1. set whether to temporarily compress vertex IDs or not in splitting edge chunks + + size_t compressed_v_size = + sizeof(vertex_t); // if set to a value smaller than sizeof(vertex_t), temporarily store vertex + // IDs in compressed_v_size byte variables + + static_assert((sizeof(vertex_t) == 4) || (sizeof(vertex_t) == 8)); + if constexpr (sizeof(vertex_t) == 8) { // 64 bit vertex ID + static_assert(std::is_signed_v); // __clzll takes a signed integer + + auto total_global_mem = handle.get_device_properties().totalGlobalMem; + size_t element_size = sizeof(vertex_t) * 2; + if (edgelist_weights) { element_size += sizeof(weight_t); } + if (edgelist_edge_ids) { element_size += sizeof(edge_id_t); } + if (edgelist_edge_types) { element_size += sizeof(edge_type_t); } + edge_t num_edges{0}; + for (size_t i = 0; i < edgelist_srcs.size(); ++i) { + num_edges += edgelist_srcs[i].size(); + } + bool compress{false}; + if (static_cast(num_edges) * element_size > + static_cast(total_global_mem * 0.5 /* tuning parameter */)) { + compress = true; + } + + if (compress) { + size_t min_clz{sizeof(vertex_t) * 8}; + for (size_t i = 0; i < num_chunks; ++i) { + min_clz = + thrust::transform_reduce(handle.get_thrust_policy(), + edgelist_srcs[i].begin(), + edgelist_srcs[i].end(), + cuda::proclaim_return_type([] __device__(auto v) { + return static_cast(__clzll(v)); + }), + min_clz, + thrust::minimum{}); + min_clz = + thrust::transform_reduce(handle.get_thrust_policy(), + edgelist_dsts[i].begin(), + edgelist_dsts[i].end(), + cuda::proclaim_return_type([] __device__(auto v) { + return static_cast(__clzll(v)); + }), + min_clz, + thrust::minimum{}); + } + compressed_v_size = sizeof(vertex_t) - (min_clz / 8); + compressed_v_size = std::max(compressed_v_size, size_t{1}); + } + } + + // 2. groupby each edge chunks to their target local adjacency matrix partition (and further // groupby within the local partition by applying the compute_gpu_id_from_vertex_t to minor vertex // IDs). - std::vector>> edgelist_partitioned_srcs( - edgelist_srcs.size()); - std::vector>> edgelist_partitioned_dsts( - edgelist_srcs.size()); - auto edgelist_partitioned_weights = - edgelist_weights ? std::make_optional>>>( - edgelist_srcs.size()) - : std::nullopt; - auto edgelist_partitioned_edge_ids = - edgelist_edge_ids - ? std::make_optional>>>( - edgelist_srcs.size()) - : std::nullopt; - auto edgelist_partitioned_edge_types = - edgelist_edge_types - ? std::make_optional>>>( - edgelist_srcs.size()) - : std::nullopt; - - for (size_t i = 0; i < edgelist_srcs.size(); ++i) { // iterate over input edge chunks + std::vector> edgelist_edge_offset_vectors(num_chunks); + for (size_t i = 0; i < num_chunks; ++i) { // iterate over input edge chunks std::optional> this_chunk_weights{std::nullopt}; if (edgelist_weights) { this_chunk_weights = std::move((*edgelist_weights)[i]); } std::optional> this_chunk_edge_ids{std::nullopt}; @@ -1060,6 +1214,9 @@ create_graph_from_edgelist_impl( this_chunk_edge_ids, this_chunk_edge_types, true); + if (this_chunk_weights) { (*edgelist_weights)[i] = std::move(*this_chunk_weights); } + if (this_chunk_edge_ids) { (*edgelist_edge_ids)[i] = std::move(*this_chunk_edge_ids); } + if (this_chunk_edge_types) { (*edgelist_edge_types)[i] = std::move(*this_chunk_edge_types); } std::vector h_this_chunk_edge_counts(d_this_chunk_edge_counts.size()); raft::update_host(h_this_chunk_edge_counts.data(), @@ -1067,132 +1224,84 @@ create_graph_from_edgelist_impl( d_this_chunk_edge_counts.size(), handle.get_stream()); handle.sync_stream(); - std::vector h_this_chunk_edge_displacements(h_this_chunk_edge_counts.size()); - std::exclusive_scan(h_this_chunk_edge_counts.begin(), + std::vector h_this_chunk_edge_offsets( + h_this_chunk_edge_counts.size() + 1, + 0); // size = minor_comm_size (# local edge partitions) * major_comm_size (# segments in the + // local minor range) + std::inclusive_scan(h_this_chunk_edge_counts.begin(), h_this_chunk_edge_counts.end(), - h_this_chunk_edge_displacements.begin(), - size_t{0}); - - for (int j = 0; j < minor_comm_size /* # local edge partitions */ * - major_comm_size /* # segments in the local minor range */; - ++j) { - rmm::device_uvector tmp_srcs(h_this_chunk_edge_counts[j], handle.get_stream()); - auto input_first = edgelist_srcs[i].begin() + h_this_chunk_edge_displacements[j]; - thrust::copy( - handle.get_thrust_policy(), input_first, input_first + tmp_srcs.size(), tmp_srcs.begin()); - edgelist_partitioned_srcs[i].push_back(std::move(tmp_srcs)); - } - edgelist_srcs[i].resize(0, handle.get_stream()); - edgelist_srcs[i].shrink_to_fit(handle.get_stream()); - - for (int j = 0; j < minor_comm_size /* # local edge partitions */ * - major_comm_size /* # segments in the local minor range */; - ++j) { - rmm::device_uvector tmp_dsts(h_this_chunk_edge_counts[j], handle.get_stream()); - auto input_first = edgelist_dsts[i].begin() + h_this_chunk_edge_displacements[j]; - thrust::copy( - handle.get_thrust_policy(), input_first, input_first + tmp_dsts.size(), tmp_dsts.begin()); - edgelist_partitioned_dsts[i].push_back(std::move(tmp_dsts)); - } - edgelist_dsts[i].resize(0, handle.get_stream()); - edgelist_dsts[i].shrink_to_fit(handle.get_stream()); - - if (this_chunk_weights) { - for (int j = 0; j < minor_comm_size /* # local edge partitions */ * - major_comm_size /* # segments in the local minor range */; - ++j) { - rmm::device_uvector tmp_weights(h_this_chunk_edge_counts[j], handle.get_stream()); - auto input_first = (*this_chunk_weights).begin() + h_this_chunk_edge_displacements[j]; - thrust::copy(handle.get_thrust_policy(), - input_first, - input_first + tmp_weights.size(), - tmp_weights.begin()); - (*edgelist_partitioned_weights)[i].push_back(std::move(tmp_weights)); - } - (*this_chunk_weights).resize(0, handle.get_stream()); - (*this_chunk_weights).shrink_to_fit(handle.get_stream()); - } - - if (this_chunk_edge_ids) { - for (int j = 0; j < minor_comm_size /* # local edge partitions */ * - major_comm_size /* # segments in the local minor range */; - ++j) { - rmm::device_uvector tmp_edge_ids(h_this_chunk_edge_counts[j], - handle.get_stream()); - auto input_first = (*this_chunk_edge_ids).begin() + h_this_chunk_edge_displacements[j]; - thrust::copy(handle.get_thrust_policy(), - input_first, - input_first + tmp_edge_ids.size(), - tmp_edge_ids.begin()); - (*edgelist_partitioned_edge_ids)[i].push_back(std::move(tmp_edge_ids)); - } - (*this_chunk_edge_ids).resize(0, handle.get_stream()); - (*this_chunk_edge_ids).shrink_to_fit(handle.get_stream()); - } + h_this_chunk_edge_offsets.begin() + 1); + edgelist_edge_offset_vectors[i] = std::move(h_this_chunk_edge_offsets); + } - if (this_chunk_edge_types) { - for (int j = 0; j < minor_comm_size /* # local edge partitions */ * - major_comm_size /* # segments in the local minor range */; - ++j) { - rmm::device_uvector tmp_edge_types(h_this_chunk_edge_counts[j], - handle.get_stream()); - auto input_first = (*this_chunk_edge_types).begin() + h_this_chunk_edge_displacements[j]; - thrust::copy(handle.get_thrust_policy(), - input_first, - input_first + tmp_edge_types.size(), - tmp_edge_types.begin()); - (*edgelist_partitioned_edge_types)[i].push_back(std::move(tmp_edge_types)); - } - (*this_chunk_edge_types).resize(0, handle.get_stream()); - (*this_chunk_edge_types).shrink_to_fit(handle.get_stream()); + // 3. compress edge chunk source/destination vertices to cut intermediate peak memory requirement + + std::optional>> edgelist_compressed_srcs{std::nullopt}; + std::optional>> edgelist_compressed_dsts{std::nullopt}; + if (compressed_v_size < sizeof(vertex_t)) { + edgelist_compressed_srcs = std::vector>{}; + edgelist_compressed_dsts = std::vector>{}; + (*edgelist_compressed_srcs).reserve(num_chunks); + (*edgelist_compressed_dsts).reserve(num_chunks); + for (size_t i = 0; i < num_chunks; ++i) { // iterate over input edge chunks + // compress source values + auto tmp_srcs = rmm::device_uvector(edgelist_srcs[i].size() * compressed_v_size, + handle.get_stream()); + auto input_src_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [src_first = edgelist_srcs[i].begin(), compressed_v_size] __device__(size_t i) { + auto v = static_cast(*(src_first + (i / compressed_v_size))); + return static_cast((v >> (8 * (i % compressed_v_size))) & uint64_t{0xff}); + })); + thrust::copy(handle.get_thrust_policy(), + input_src_first, + input_src_first + edgelist_srcs[i].size() * compressed_v_size, + tmp_srcs.begin()); + edgelist_srcs[i].resize(0, handle.get_stream()); + edgelist_srcs[i].shrink_to_fit(handle.get_stream()); + (*edgelist_compressed_srcs).push_back(std::move(tmp_srcs)); + + // compress destination values + + auto tmp_dsts = rmm::device_uvector(edgelist_dsts[i].size() * compressed_v_size, + handle.get_stream()); + auto input_dst_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type( + [dst_first = edgelist_dsts[i].begin(), compressed_v_size] __device__(size_t i) { + auto v = static_cast(*(dst_first + (i / compressed_v_size))); + return static_cast((v >> (8 * (i % compressed_v_size))) & uint64_t{0xff}); + })); + thrust::copy(handle.get_thrust_policy(), + input_dst_first, + input_dst_first + edgelist_dsts[i].size() * compressed_v_size, + tmp_dsts.begin()); + edgelist_dsts[i].resize(0, handle.get_stream()); + edgelist_dsts[i].shrink_to_fit(handle.get_stream()); + (*edgelist_compressed_dsts).push_back(std::move(tmp_dsts)); } } - edgelist_srcs.clear(); - edgelist_dsts.clear(); - if (edgelist_weights) { (*edgelist_weights).clear(); } - if (edgelist_edge_ids) { (*edgelist_edge_ids).clear(); } - if (edgelist_edge_types) { (*edgelist_edge_types).clear(); } - // 2. split the grouped edge chunks to local partitions + // 4. compute additional copy_offset vectors - auto edgelist_intra_partition_segment_offsets = std::vector>(minor_comm_size); - - std::vector> edge_partition_edgelist_srcs{}; - edge_partition_edgelist_srcs.reserve(minor_comm_size); - std::vector> edge_partition_edgelist_dsts{}; - edge_partition_edgelist_dsts.reserve(minor_comm_size); - auto edge_partition_edgelist_weights = - edgelist_partitioned_weights ? std::make_optional>>() - : std::nullopt; - if (edgelist_partitioned_weights) { (*edge_partition_edgelist_weights).reserve(minor_comm_size); } - auto edge_partition_edgelist_edge_ids = - edgelist_partitioned_edge_ids - ? std::make_optional>>() - : std::nullopt; - if (edgelist_partitioned_edge_ids) { - (*edge_partition_edgelist_edge_ids).reserve(minor_comm_size); - } - auto edge_partition_edgelist_edge_types = - edgelist_partitioned_edge_types - ? std::make_optional>>() - : std::nullopt; - if (edgelist_partitioned_edge_types) { - (*edge_partition_edgelist_edge_types).reserve(minor_comm_size); - } - - for (int i = 0; i < minor_comm_size; ++i) { // iterate over local edge partitions + std::vector edge_partition_edge_counts(minor_comm_size); + std::vector> edge_partition_intra_partition_segment_offset_vectors( + minor_comm_size); + std::vector> edge_partition_intra_segment_copy_output_displacement_vectors( + minor_comm_size); + for (int i = 0; i < minor_comm_size; ++i) { edge_t edge_count{0}; std::vector intra_partition_segment_sizes(major_comm_size, 0); - std::vector intra_segment_copy_output_displacements(major_comm_size * - edgelist_partitioned_srcs.size()); + std::vector intra_segment_copy_output_displacements(major_comm_size * num_chunks); for (int j = 0; j < major_comm_size /* # segments in the local minor range */; ++j) { edge_t displacement{0}; - for (size_t k = 0; k < edgelist_partitioned_srcs.size() /* # input edge chunks */; ++k) { - auto segment_size = edgelist_partitioned_srcs[k][i * major_comm_size + j].size(); + for (size_t k = 0; k < num_chunks; ++k) { + auto segment_size = edgelist_edge_offset_vectors[k][i * major_comm_size + j + 1] - + edgelist_edge_offset_vectors[k][i * major_comm_size + j]; edge_count += segment_size; intra_partition_segment_sizes[j] += segment_size; - intra_segment_copy_output_displacements[j * edgelist_partitioned_srcs.size() + k] = - displacement; + intra_segment_copy_output_displacements[j * num_chunks + k] = displacement; displacement += segment_size; } } @@ -1201,93 +1310,133 @@ create_graph_from_edgelist_impl( intra_partition_segment_sizes.end(), intra_partition_segment_offsets.begin() + 1); - rmm::device_uvector tmp_srcs(edge_count, handle.get_stream()); - for (int j = 0; j < major_comm_size; ++j) { - for (size_t k = 0; k < edgelist_partitioned_srcs.size(); ++k) { - auto& input_buffer = edgelist_partitioned_srcs[k][i * major_comm_size + j]; - thrust::copy( - handle.get_thrust_policy(), - input_buffer.begin(), - input_buffer.end(), - tmp_srcs.begin() + intra_partition_segment_offsets[j] + - intra_segment_copy_output_displacements[j * edgelist_partitioned_srcs.size() + k]); - input_buffer.resize(0, handle.get_stream()); - input_buffer.shrink_to_fit(handle.get_stream()); - } - } - edge_partition_edgelist_srcs.push_back(std::move(tmp_srcs)); + edge_partition_edge_counts[i] = edge_count; + edge_partition_intra_partition_segment_offset_vectors[i] = + std::move(intra_partition_segment_offsets); + edge_partition_intra_segment_copy_output_displacement_vectors[i] = + std::move(intra_segment_copy_output_displacements); + } - rmm::device_uvector tmp_dsts(edge_count, handle.get_stream()); - for (int j = 0; j < major_comm_size; ++j) { - for (size_t k = 0; k < edgelist_partitioned_dsts.size(); ++k) { - auto& input_buffer = edgelist_partitioned_dsts[k][i * major_comm_size + j]; - thrust::copy( - handle.get_thrust_policy(), - input_buffer.begin(), - input_buffer.end(), - tmp_dsts.begin() + intra_partition_segment_offsets[j] + - intra_segment_copy_output_displacements[j * edgelist_partitioned_dsts.size() + k]); - input_buffer.resize(0, handle.get_stream()); - input_buffer.shrink_to_fit(handle.get_stream()); - } - } - edge_partition_edgelist_dsts.push_back(std::move(tmp_dsts)); + // 5. split the grouped edge chunks to local partitions - if (edge_partition_edgelist_weights) { - rmm::device_uvector tmp_weights(edge_count, handle.get_stream()); - for (int j = 0; j < major_comm_size; ++j) { - for (size_t k = 0; k < edgelist_partitioned_dsts.size(); ++k) { - auto& input_buffer = (*edgelist_partitioned_weights)[k][i * major_comm_size + j]; - thrust::copy( - handle.get_thrust_policy(), - input_buffer.begin(), - input_buffer.end(), - tmp_weights.begin() + intra_partition_segment_offsets[j] + - intra_segment_copy_output_displacements[j * edgelist_partitioned_dsts.size() + k]); - input_buffer.resize(0, handle.get_stream()); - input_buffer.shrink_to_fit(handle.get_stream()); - } - } - (*edge_partition_edgelist_weights).push_back(std::move(tmp_weights)); - } + std::vector> edge_partition_edgelist_srcs{}; + std::vector> edge_partition_edgelist_dsts{}; + std::optional>> edge_partition_edgelist_weights{ + std::nullopt}; + std::optional>> edge_partition_edgelist_edge_ids{ + std::nullopt}; + std::optional>> edge_partition_edgelist_edge_types{ + std::nullopt}; - if (edge_partition_edgelist_edge_ids) { - rmm::device_uvector tmp_edge_ids(edge_count, handle.get_stream()); - for (int j = 0; j < major_comm_size; ++j) { - for (size_t k = 0; k < edgelist_partitioned_dsts.size(); ++k) { - auto& input_buffer = (*edgelist_partitioned_edge_ids)[k][i * major_comm_size + j]; - thrust::copy( - handle.get_thrust_policy(), - input_buffer.begin(), - input_buffer.end(), - tmp_edge_ids.begin() + intra_partition_segment_offsets[j] + - intra_segment_copy_output_displacements[j * edgelist_partitioned_dsts.size() + k]); - input_buffer.resize(0, handle.get_stream()); - input_buffer.shrink_to_fit(handle.get_stream()); - } - } - (*edge_partition_edgelist_edge_ids).push_back(std::move(tmp_edge_ids)); - } + std::optional>> + edge_partition_edgelist_compressed_srcs{}; + std::optional>> + edge_partition_edgelist_compressed_dsts{}; - if (edge_partition_edgelist_edge_types) { - rmm::device_uvector tmp_edge_types(edge_count, handle.get_stream()); - for (int j = 0; j < major_comm_size; ++j) { - for (size_t k = 0; k < edgelist_partitioned_dsts.size(); ++k) { - auto& input_buffer = (*edgelist_partitioned_edge_types)[k][i * major_comm_size + j]; - thrust::copy( - handle.get_thrust_policy(), - input_buffer.begin(), - input_buffer.end(), - tmp_edge_types.begin() + intra_partition_segment_offsets[j] + - intra_segment_copy_output_displacements[j * edgelist_partitioned_dsts.size() + k]); - input_buffer.resize(0, handle.get_stream()); - input_buffer.shrink_to_fit(handle.get_stream()); - } - } - (*edge_partition_edgelist_edge_types).push_back(std::move(tmp_edge_types)); - } + if (compressed_v_size < sizeof(vertex_t)) { + edge_partition_edgelist_compressed_srcs = + split_edge_chunk_compressed_elements_to_local_edge_partitions( + handle, + std::move(*edgelist_compressed_srcs), + edgelist_edge_offset_vectors, + edge_partition_edge_counts, + edge_partition_intra_partition_segment_offset_vectors, + edge_partition_intra_segment_copy_output_displacement_vectors, + compressed_v_size); + + edge_partition_edgelist_compressed_dsts = + split_edge_chunk_compressed_elements_to_local_edge_partitions( + handle, + std::move(*edgelist_compressed_dsts), + edgelist_edge_offset_vectors, + edge_partition_edge_counts, + edge_partition_intra_partition_segment_offset_vectors, + edge_partition_intra_segment_copy_output_displacement_vectors, + compressed_v_size); + } else { + edge_partition_edgelist_srcs = + split_edge_chunk_elements_to_local_edge_partitions( + handle, + std::move(edgelist_srcs), + edgelist_edge_offset_vectors, + edge_partition_edge_counts, + edge_partition_intra_partition_segment_offset_vectors, + edge_partition_intra_segment_copy_output_displacement_vectors); + + edge_partition_edgelist_dsts = + split_edge_chunk_elements_to_local_edge_partitions( + handle, + std::move(edgelist_dsts), + edgelist_edge_offset_vectors, + edge_partition_edge_counts, + edge_partition_intra_partition_segment_offset_vectors, + edge_partition_intra_segment_copy_output_displacement_vectors); + } + + if (edgelist_weights) { + edge_partition_edgelist_weights = + split_edge_chunk_elements_to_local_edge_partitions( + handle, + std::move(*edgelist_weights), + edgelist_edge_offset_vectors, + edge_partition_edge_counts, + edge_partition_intra_partition_segment_offset_vectors, + edge_partition_intra_segment_copy_output_displacement_vectors); + } + if (edgelist_edge_ids) { + edge_partition_edgelist_edge_ids = + split_edge_chunk_elements_to_local_edge_partitions( + handle, + std::move(*edgelist_edge_ids), + edgelist_edge_offset_vectors, + edge_partition_edge_counts, + edge_partition_intra_partition_segment_offset_vectors, + edge_partition_intra_segment_copy_output_displacement_vectors); + } + if (edgelist_edge_types) { + edge_partition_edgelist_edge_types = + split_edge_chunk_elements_to_local_edge_partitions( + handle, + std::move(*edgelist_edge_types), + edgelist_edge_offset_vectors, + edge_partition_edge_counts, + edge_partition_intra_partition_segment_offset_vectors, + edge_partition_intra_segment_copy_output_displacement_vectors); + } - edgelist_intra_partition_segment_offsets[i] = std::move(intra_partition_segment_offsets); + // 6. decompress edge chunk source/destination vertices to cut intermediate peak memory + // requirement + + if (compressed_v_size < sizeof(vertex_t)) { + assert(edge_partition_edgelist_compressed_srcs); + assert(edge_partition_edgelist_compressed_dsts); + + edge_partition_edgelist_srcs.reserve(minor_comm_size); + edge_partition_edgelist_dsts.reserve(minor_comm_size); + + for (int i = 0; i < minor_comm_size; ++i) { + rmm::device_uvector tmp_srcs(edge_partition_edge_counts[i], handle.get_stream()); + decompress_vertices( + handle, + raft::device_span((*edge_partition_edgelist_compressed_srcs)[i].data(), + (*edge_partition_edgelist_compressed_srcs)[i].size()), + raft::device_span(tmp_srcs.data(), tmp_srcs.size()), + compressed_v_size); + edge_partition_edgelist_srcs.push_back(std::move(tmp_srcs)); + (*edge_partition_edgelist_compressed_srcs)[i].resize(0, handle.get_stream()); + (*edge_partition_edgelist_compressed_srcs)[i].shrink_to_fit(handle.get_stream()); + + rmm::device_uvector tmp_dsts(edge_partition_edge_counts[i], handle.get_stream()); + decompress_vertices( + handle, + raft::device_span((*edge_partition_edgelist_compressed_dsts)[i].data(), + (*edge_partition_edgelist_compressed_dsts)[i].size()), + raft::device_span(tmp_dsts.data(), tmp_dsts.size()), + compressed_v_size); + edge_partition_edgelist_dsts.push_back(std::move(tmp_dsts)); + (*edge_partition_edgelist_compressed_dsts)[i].resize(0, handle.get_stream()); + (*edge_partition_edgelist_compressed_dsts)[i].shrink_to_fit(handle.get_stream()); + } } return create_graph_from_partitioned_edgelist(edgelist_srcs.data(), edgelist_srcs.size()), raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()))), - "Invalid input arguments: graph_properties.is_symmetric is true but the input edge list is " + "Invalid input arguments: graph_properties.is_symmetric is true but the input edge " + "list is " "not symmetric."); } @@ -1377,7 +1527,8 @@ create_graph_from_edgelist_impl( handle, raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), raft::device_span(edgelist_dsts.data(), edgelist_dsts.size())), - "Invalid input arguments: graph_properties.is_multigraph is false but the input edge list " + "Invalid input arguments: graph_properties.is_multigraph is false but the input edge " + "list " "has parallel edges."); } } @@ -1605,7 +1756,8 @@ create_graph_from_edgelist_impl( cugraph::graph_meta_t{ num_vertices, graph_properties, - renumber ? std::optional>{meta.segment_offsets} : std::nullopt}), + renumber ? std::optional>{meta.segment_offsets} : std::nullopt, + meta.hypersparse_degree_offsets}), std::move(edge_weights), std::move(edge_ids), std::move(edge_types), @@ -1759,15 +1911,15 @@ create_graph_from_edgelist_impl( renumber); if (graph_properties.is_symmetric) { - CUGRAPH_EXPECTS( - (check_symmetric( - handle, - raft::device_span(aggregate_edgelist_srcs.data(), - aggregate_edgelist_srcs.size()), - raft::device_span(aggregate_edgelist_dsts.data(), - aggregate_edgelist_dsts.size()))), - "Invalid input arguments: graph_properties.is_symmetric is true but the input edge list is " - "not symmetric."); + CUGRAPH_EXPECTS((check_symmetric( + handle, + raft::device_span(aggregate_edgelist_srcs.data(), + aggregate_edgelist_srcs.size()), + raft::device_span(aggregate_edgelist_dsts.data(), + aggregate_edgelist_dsts.size()))), + "Invalid input arguments: graph_properties.is_symmetric is true but the " + "input edge list is " + "not symmetric."); } if (!graph_properties.is_multigraph) { @@ -1777,7 +1929,8 @@ create_graph_from_edgelist_impl( aggregate_edgelist_srcs.size()), raft::device_span(aggregate_edgelist_dsts.data(), aggregate_edgelist_dsts.size())), - "Invalid input arguments: graph_properties.is_multigraph is false but the input edge list " + "Invalid input arguments: graph_properties.is_multigraph is false but " + "the input edge list " "has parallel edges."); } } diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh index 1ef975c1dec..86e3c45ca2f 100644 --- a/cpp/src/structure/detail/structure_utils.cuh +++ b/cpp/src/structure/detail/structure_utils.cuh @@ -60,7 +60,8 @@ rmm::device_uvector compute_sparse_offsets( bool edgelist_major_sorted, rmm::cuda_stream_view stream_view) { - rmm::device_uvector offsets((major_range_last - major_range_first) + 1, stream_view); + rmm::device_uvector offsets(static_cast(major_range_last - major_range_first) + 1, + stream_view); if (edgelist_major_sorted) { offsets.set_element_to_zero_async(0, stream_view); thrust::upper_bound(rmm::exec_policy(stream_view), @@ -77,7 +78,9 @@ rmm::device_uvector compute_sparse_offsets( edgelist_major_first, edgelist_major_last, [offset_view, major_range_first] __device__(auto v) { - atomicAdd(&offset_view[v - major_range_first], edge_t{1}); + cuda::atomic_ref atomic_counter( + offset_view[v - major_range_first]); + atomic_counter.fetch_add(edge_t{1}, cuda::std::memory_order_relaxed); }); thrust::exclusive_scan( @@ -246,30 +249,112 @@ sort_and_compress_edgelist(rmm::device_uvector&& edgelist_srcs, rmm::device_uvector offsets(0, stream_view); rmm::device_uvector indices(0, stream_view); - auto edge_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin()); if (edgelist_minors.size() > mem_frugal_threshold) { - offsets = compute_sparse_offsets(edgelist_majors.begin(), - edgelist_majors.end(), - major_range_first, - major_range_last, - false, - stream_view); + static_assert((sizeof(vertex_t) == 4) || (sizeof(vertex_t) == 8)); + if ((sizeof(vertex_t) == 8) && (static_cast(major_range_last - major_range_first) <= + static_cast(std::numeric_limits::max()))) { + rmm::device_uvector edgelist_major_offsets(edgelist_majors.size(), stream_view); + thrust::transform( + rmm::exec_policy_nosync(stream_view), + edgelist_majors.begin(), + edgelist_majors.end(), + edgelist_major_offsets.begin(), + cuda::proclaim_return_type([major_range_first] __device__(vertex_t major) { + return static_cast(major - major_range_first); + })); + edgelist_majors.resize(0, stream_view); + edgelist_majors.shrink_to_fit(stream_view); + + offsets = + compute_sparse_offsets(edgelist_major_offsets.begin(), + edgelist_major_offsets.end(), + uint32_t{0}, + static_cast(major_range_last - major_range_first), + false, + stream_view); + std::array pivots{}; + for (size_t i = 0; i < 3; ++i) { + pivots[i] = static_cast(thrust::distance( + offsets.begin(), + thrust::lower_bound(rmm::exec_policy(stream_view), + offsets.begin(), + offsets.end(), + static_cast((edgelist_major_offsets.size() * (i + 1)) / 4)))); + } - auto pivot = major_range_first + static_cast(thrust::distance( - offsets.begin(), - thrust::lower_bound(rmm::exec_policy(stream_view), - offsets.begin(), - offsets.end(), - edgelist_minors.size() / 2))); - auto second_first = - detail::mem_frugal_partition(edge_first, - edge_first + edgelist_minors.size(), - thrust_tuple_get, 0>{}, - pivot, - stream_view); - thrust::sort(rmm::exec_policy(stream_view), edge_first, second_first); - thrust::sort(rmm::exec_policy(stream_view), second_first, edge_first + edgelist_minors.size()); + auto pair_first = + thrust::make_zip_iterator(edgelist_major_offsets.begin(), edgelist_minors.begin()); + auto second_half_first = + detail::mem_frugal_partition(pair_first, + pair_first + edgelist_major_offsets.size(), + thrust_tuple_get, 0>{}, + pivots[1], + stream_view); + auto second_quarter_first = + detail::mem_frugal_partition(pair_first, + second_half_first, + thrust_tuple_get, 0>{}, + pivots[0], + stream_view); + auto last_quarter_first = + detail::mem_frugal_partition(second_half_first, + pair_first + edgelist_major_offsets.size(), + thrust_tuple_get, 0>{}, + pivots[2], + stream_view); + thrust::sort(rmm::exec_policy(stream_view), pair_first, second_quarter_first); + thrust::sort(rmm::exec_policy(stream_view), second_quarter_first, second_half_first); + thrust::sort(rmm::exec_policy(stream_view), second_half_first, last_quarter_first); + thrust::sort(rmm::exec_policy(stream_view), + last_quarter_first, + pair_first + edgelist_major_offsets.size()); + } else { + offsets = compute_sparse_offsets(edgelist_majors.begin(), + edgelist_majors.end(), + major_range_first, + major_range_last, + false, + stream_view); + std::array pivots{}; + for (size_t i = 0; i < 3; ++i) { + pivots[i] = + major_range_first + + static_cast(thrust::distance( + offsets.begin(), + thrust::lower_bound(rmm::exec_policy(stream_view), + offsets.begin(), + offsets.end(), + static_cast((edgelist_minors.size() * (i + 1)) / 4)))); + } + auto edge_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin()); + auto second_half_first = + detail::mem_frugal_partition(edge_first, + edge_first + edgelist_majors.size(), + thrust_tuple_get, 0>{}, + pivots[1], + stream_view); + auto second_quarter_first = + detail::mem_frugal_partition(edge_first, + second_half_first, + thrust_tuple_get, 0>{}, + pivots[0], + stream_view); + auto last_quarter_first = + detail::mem_frugal_partition(second_half_first, + edge_first + edgelist_majors.size(), + thrust_tuple_get, 0>{}, + pivots[2], + stream_view); + thrust::sort(rmm::exec_policy(stream_view), edge_first, second_quarter_first); + thrust::sort(rmm::exec_policy(stream_view), second_quarter_first, second_half_first); + thrust::sort(rmm::exec_policy(stream_view), second_half_first, last_quarter_first); + thrust::sort( + rmm::exec_policy(stream_view), last_quarter_first, edge_first + edgelist_majors.size()); + edgelist_majors.resize(0, stream_view); + edgelist_majors.shrink_to_fit(stream_view); + } } else { + auto edge_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin()); thrust::sort(rmm::exec_policy(stream_view), edge_first, edge_first + edgelist_minors.size()); offsets = compute_sparse_offsets(edgelist_majors.begin(), edgelist_majors.end(), @@ -277,12 +362,11 @@ sort_and_compress_edgelist(rmm::device_uvector&& edgelist_srcs, major_range_last, true, stream_view); + edgelist_majors.resize(0, stream_view); + edgelist_majors.shrink_to_fit(stream_view); } indices = std::move(edgelist_minors); - edgelist_majors.resize(0, stream_view); - edgelist_majors.shrink_to_fit(stream_view); - std::optional> dcs_nzd_vertices{std::nullopt}; if (major_hypersparse_first) { std::tie(offsets, dcs_nzd_vertices) = compress_hypersparse_offsets(std::move(offsets), diff --git a/cpp/src/structure/graph_impl.cuh b/cpp/src/structure/graph_impl.cuh index ef43b7b13ec..6661f0488d8 100644 --- a/cpp/src/structure/graph_impl.cuh +++ b/cpp/src/structure/graph_impl.cuh @@ -146,8 +146,7 @@ update_local_sorted_unique_edge_majors_minors( auto num_segments_per_vertex_partition = static_cast(meta.edge_partition_segment_offsets.size() / minor_comm_size); - auto use_dcs = - num_segments_per_vertex_partition > (detail::num_sparse_segments_per_vertex_partition + 2); + auto use_dcs = edge_partition_dcs_nzd_vertices.has_value(); std::optional>> local_sorted_unique_edge_majors{ std::nullopt}; @@ -166,14 +165,15 @@ update_local_sorted_unique_edge_majors_minors( // 1. Update local_sorted_unique_edge_minors & local_sorted_unique_edge_minor_offsets - { + if (detail::edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold > 0.0) { auto [minor_range_first, minor_range_last] = meta.partition.local_edge_partition_minor_range(); auto minor_range_size = meta.partition.local_edge_partition_minor_range_size(); - rmm::device_uvector minor_bitmaps( - (minor_range_size + (sizeof(uint32_t) * 8 - 1)) / (sizeof(uint32_t) * 8), - handle.get_stream()); - thrust::fill( - handle.get_thrust_policy(), minor_bitmaps.begin(), minor_bitmaps.end(), uint32_t{0}); + rmm::device_uvector minor_bitmaps(packed_bool_size(minor_range_size), + handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + minor_bitmaps.begin(), + minor_bitmaps.end(), + packed_bool_empty_mask()); for (size_t i = 0; i < edge_partition_indices.size(); ++i) { thrust::for_each(handle.get_thrust_policy(), edge_partition_indices[i].begin(), @@ -281,92 +281,96 @@ update_local_sorted_unique_edge_majors_minors( // 2. Update local_sorted_unique_edge_majors & local_sorted_unique_edge_major_offsets - std::vector num_local_unique_edge_major_counts(edge_partition_offsets.size()); - for (size_t i = 0; i < edge_partition_offsets.size(); ++i) { - num_local_unique_edge_major_counts[i] += thrust::count_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(static_cast(edge_partition_offsets[i].size() - 1)), - has_nzd_t{edge_partition_offsets[i].data(), vertex_t{0}}); - } - auto num_local_unique_edge_majors = std::reduce(num_local_unique_edge_major_counts.begin(), - num_local_unique_edge_major_counts.end()); - - vertex_t aggregate_major_range_size{0}; - for (size_t i = 0; i < meta.partition.number_of_local_edge_partitions(); ++i) { - aggregate_major_range_size += meta.partition.local_edge_partition_major_range_size(i); - } - - auto max_major_properties_fill_ratio = - host_scalar_allreduce(comm, - static_cast(num_local_unique_edge_majors) / - static_cast(aggregate_major_range_size), - raft::comms::op_t::MAX, - handle.get_stream()); + if (detail::edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold > 0.0) { + std::vector num_local_unique_edge_major_counts(edge_partition_offsets.size()); + for (size_t i = 0; i < edge_partition_offsets.size(); ++i) { + num_local_unique_edge_major_counts[i] = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(static_cast(edge_partition_offsets[i].size() - 1)), + has_nzd_t{edge_partition_offsets[i].data(), vertex_t{0}}); + } + auto num_local_unique_edge_majors = std::reduce(num_local_unique_edge_major_counts.begin(), + num_local_unique_edge_major_counts.end()); - if (max_major_properties_fill_ratio < - detail::edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold) { - auto const chunk_size = - static_cast(std::min(1.0 / max_major_properties_fill_ratio, 1024.0)); + vertex_t aggregate_major_range_size{0}; + for (size_t i = 0; i < meta.partition.number_of_local_edge_partitions(); ++i) { + aggregate_major_range_size += meta.partition.local_edge_partition_major_range_size(i); + } - local_sorted_unique_edge_majors = std::vector>{}; - local_sorted_unique_edge_major_chunk_start_offsets = - std::vector>{}; + auto max_major_properties_fill_ratio = + host_scalar_allreduce(comm, + static_cast(num_local_unique_edge_majors) / + static_cast(aggregate_major_range_size), + raft::comms::op_t::MAX, + handle.get_stream()); - (*local_sorted_unique_edge_majors).reserve(edge_partition_offsets.size()); - (*local_sorted_unique_edge_major_chunk_start_offsets).reserve(edge_partition_offsets.size()); - for (size_t i = 0; i < edge_partition_offsets.size(); ++i) { - auto [major_range_first, major_range_last] = - meta.partition.local_edge_partition_major_range(i); - auto sparse_range_last = - use_dcs - ? (major_range_first + - meta.edge_partition_segment_offsets[num_segments_per_vertex_partition * i + + if (max_major_properties_fill_ratio < + detail::edge_partition_src_dst_property_values_kv_pair_fill_ratio_threshold) { + auto const chunk_size = + static_cast(std::min(1.0 / max_major_properties_fill_ratio, 1024.0)); + + local_sorted_unique_edge_majors = std::vector>{}; + local_sorted_unique_edge_major_chunk_start_offsets = + std::vector>{}; + + (*local_sorted_unique_edge_majors).reserve(edge_partition_offsets.size()); + (*local_sorted_unique_edge_major_chunk_start_offsets).reserve(edge_partition_offsets.size()); + for (size_t i = 0; i < edge_partition_offsets.size(); ++i) { + auto [major_range_first, major_range_last] = + meta.partition.local_edge_partition_major_range(i); + auto sparse_range_last = + use_dcs + ? (major_range_first + + meta + .edge_partition_segment_offsets[num_segments_per_vertex_partition * i + detail::num_sparse_segments_per_vertex_partition]) - : major_range_last; - - rmm::device_uvector unique_edge_majors(num_local_unique_edge_major_counts[i], - handle.get_stream()); - CUGRAPH_EXPECTS( - sparse_range_last - major_range_first < std::numeric_limits::max(), - "copy_if will fail (https://github.com/NVIDIA/thrust/issues/1302), work-around required."); - auto cur_size = thrust::distance( - unique_edge_majors.begin(), - thrust::copy_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(major_range_first), - thrust::make_counting_iterator(sparse_range_last), + : major_range_last; + + rmm::device_uvector unique_edge_majors(num_local_unique_edge_major_counts[i], + handle.get_stream()); + CUGRAPH_EXPECTS(sparse_range_last - major_range_first < std::numeric_limits::max(), + "copy_if will fail (https://github.com/NVIDIA/thrust/issues/1302), " + "work-around required."); + auto cur_size = thrust::distance( unique_edge_majors.begin(), - has_nzd_t{edge_partition_offsets[i].data(), major_range_first})); - if (use_dcs) { - thrust::copy(handle.get_thrust_policy(), - (*edge_partition_dcs_nzd_vertices)[i].begin(), - (*edge_partition_dcs_nzd_vertices)[i].end(), - unique_edge_majors.begin() + cur_size); + thrust::copy_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(major_range_first), + thrust::make_counting_iterator(sparse_range_last), + unique_edge_majors.begin(), + has_nzd_t{edge_partition_offsets[i].data(), major_range_first})); + if (use_dcs) { + thrust::copy(handle.get_thrust_policy(), + (*edge_partition_dcs_nzd_vertices)[i].begin(), + (*edge_partition_dcs_nzd_vertices)[i].end(), + unique_edge_majors.begin() + cur_size); + } + + auto num_chunks = static_cast( + ((major_range_last - major_range_first) + (chunk_size - size_t{1})) / chunk_size); + rmm::device_uvector unique_edge_major_chunk_start_offsets(num_chunks + size_t{1}, + handle.get_stream()); + + auto chunk_start_vertex_first = + thrust::make_transform_iterator(thrust::make_counting_iterator(vertex_t{0}), + detail::multiply_and_add_t{ + static_cast(chunk_size), major_range_first}); + thrust::lower_bound(handle.get_thrust_policy(), + unique_edge_majors.begin(), + unique_edge_majors.end(), + chunk_start_vertex_first, + chunk_start_vertex_first + num_chunks, + unique_edge_major_chunk_start_offsets.begin()); + unique_edge_major_chunk_start_offsets.set_element( + num_chunks, static_cast(unique_edge_majors.size()), handle.get_stream()); + + (*local_sorted_unique_edge_majors).push_back(std::move(unique_edge_majors)); + (*local_sorted_unique_edge_major_chunk_start_offsets) + .push_back(std::move(unique_edge_major_chunk_start_offsets)); } - - auto num_chunks = static_cast( - ((major_range_last - major_range_first) + (chunk_size - size_t{1})) / chunk_size); - rmm::device_uvector unique_edge_major_chunk_start_offsets(num_chunks + size_t{1}, - handle.get_stream()); - - auto chunk_start_vertex_first = thrust::make_transform_iterator( - thrust::make_counting_iterator(vertex_t{0}), - detail::multiply_and_add_t{static_cast(chunk_size), major_range_first}); - thrust::lower_bound(handle.get_thrust_policy(), - unique_edge_majors.begin(), - unique_edge_majors.end(), - chunk_start_vertex_first, - chunk_start_vertex_first + num_chunks, - unique_edge_major_chunk_start_offsets.begin()); - unique_edge_major_chunk_start_offsets.set_element( - num_chunks, static_cast(unique_edge_majors.size()), handle.get_stream()); - - (*local_sorted_unique_edge_majors).push_back(std::move(unique_edge_majors)); - (*local_sorted_unique_edge_major_chunk_start_offsets) - .push_back(std::move(unique_edge_major_chunk_start_offsets)); + local_sorted_unique_edge_major_chunk_size = chunk_size; } - local_sorted_unique_edge_major_chunk_size = chunk_size; } return std::make_tuple(std::move(local_sorted_unique_edge_majors), @@ -378,6 +382,50 @@ update_local_sorted_unique_edge_majors_minors( std::move(local_sorted_unique_edge_minor_vertex_partition_offsets)); } +template +std::enable_if_t>> +compute_edge_partition_dcs_nzd_range_bitmaps( + raft::handle_t const& handle, + graph_meta_t const& meta, + std::vector> const& edge_partition_dcs_nzd_vertices) +{ + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto num_segments_per_vertex_partition = + static_cast(meta.edge_partition_segment_offsets.size() / minor_comm_size); + + std::vector> edge_partition_dcs_nzd_range_bitmaps{}; + edge_partition_dcs_nzd_range_bitmaps.reserve(edge_partition_dcs_nzd_vertices.size()); + for (size_t i = 0; i < edge_partition_dcs_nzd_vertices.size(); ++i) { + raft::host_span segment_offsets( + meta.edge_partition_segment_offsets.data() + num_segments_per_vertex_partition * i, + num_segments_per_vertex_partition); + rmm::device_uvector bitmap( + packed_bool_size(segment_offsets[detail::num_sparse_segments_per_vertex_partition + 1] - + segment_offsets[detail::num_sparse_segments_per_vertex_partition]), + handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), bitmap.begin(), bitmap.end(), packed_bool_empty_mask()); + auto major_range_first = meta.partition.local_edge_partition_major_range_first(i); + auto major_hypersparse_first = + major_range_first + segment_offsets[detail::num_sparse_segments_per_vertex_partition]; + thrust::for_each(handle.get_thrust_policy(), + edge_partition_dcs_nzd_vertices[i].begin(), + edge_partition_dcs_nzd_vertices[i].end(), + [bitmap = raft::device_span(bitmap.data(), bitmap.size()), + major_hypersparse_first] __device__(auto major) { + auto offset = major - major_hypersparse_first; + cuda::atomic_ref word( + bitmap[packed_bool_offset(offset)]); + word.fetch_or(packed_bool_mask(offset), cuda::std::memory_order_relaxed); + }); + edge_partition_dcs_nzd_range_bitmaps.push_back(std::move(bitmap)); + } + + return edge_partition_dcs_nzd_range_bitmaps; +} + } // namespace template @@ -400,7 +448,8 @@ graph_t @@ -452,7 +506,8 @@ graph_t(indices.size()), meta.properties), offsets_(std::move(offsets)), indices_(std::move(indices)), - segment_offsets_(meta.segment_offsets) + segment_offsets_(meta.segment_offsets), + hypersparse_degree_offsets_(meta.hypersparse_degree_offsets) { } diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index f925a142737..31de9b1e5d3 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -488,14 +488,18 @@ graph_view_t> const& edge_partition_indices, std::optional>> const& edge_partition_dcs_nzd_vertices, + std::optional>> const& + edge_partition_dcs_nzd_range_bitmaps, graph_view_meta_t meta) : detail::graph_base_t( meta.number_of_vertices, meta.number_of_edges, meta.properties), edge_partition_offsets_(edge_partition_offsets), edge_partition_indices_(edge_partition_indices), edge_partition_dcs_nzd_vertices_(edge_partition_dcs_nzd_vertices), + edge_partition_dcs_nzd_range_bitmaps_(edge_partition_dcs_nzd_range_bitmaps), partition_(meta.partition), edge_partition_segment_offsets_(meta.edge_partition_segment_offsets), + edge_partition_hypersparse_degree_offsets_(meta.edge_partition_hypersparse_degree_offsets), local_sorted_unique_edge_srcs_(meta.local_sorted_unique_edge_srcs), local_sorted_unique_edge_src_chunk_start_offsets_( meta.local_sorted_unique_edge_src_chunk_start_offsets), @@ -538,7 +542,8 @@ graph_view_t #include #include -#ifdef TIMING -#include -#endif #include @@ -127,10 +122,6 @@ extract_induced_subgraphs( raft::device_span subgraph_vertices, bool do_expensive_check) { -#ifdef TIMING - HighResTimer hr_timer; - hr_timer.start("extract_induced_subgraphs"); -#endif // 1. check input arguments if (do_expensive_check) { @@ -281,10 +272,6 @@ extract_induced_subgraphs( true, handle.get_stream()); -#ifdef TIMING - hr_timer.stop(); - hr_timer.display_and_clear(std::cout); -#endif return std::make_tuple(std::move(edge_majors), std::move(edge_minors), std::move(edge_weights), diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index 41f81d72ab1..bd7d48ac314 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include #include #include @@ -233,128 +235,299 @@ std::optional find_locally_unused_ext_vertex_id( : std::nullopt /* if the entire range of vertex_t is used */; } -// returns renumber map and segment_offsets +// returns renumber map, segment_offsets, and hypersparse_degree_offsets template -std::tuple, std::vector, vertex_t> compute_renumber_map( - raft::handle_t const& handle, - std::optional>&& local_vertices, - std::vector const& edgelist_majors, - std::vector const& edgelist_minors, - std::vector const& edgelist_edge_counts) +std::tuple, + std::vector, + std::optional>, + vertex_t> +compute_renumber_map(raft::handle_t const& handle, + std::optional>&& local_vertices, + std::vector const& edgelist_majors, + std::vector const& edgelist_minors, + std::vector const& edgelist_edge_counts) { - rmm::device_uvector sorted_local_vertices(0, handle.get_stream()); - - edge_t num_local_edges = std::reduce(edgelist_edge_counts.begin(), edgelist_edge_counts.end()); - - // 1. if local_vertices.has_value() is false, find unique vertices from edge majors (to construct - // local_vertices) + // 1. if local_vertices.has_value() is false, find unique vertices from edge majors & minors (to + // construct local_vertices) - rmm::device_uvector sorted_unique_majors(0, handle.get_stream()); + rmm::device_uvector sorted_local_vertices(0, handle.get_stream()); if (!local_vertices) { - sorted_unique_majors.resize(num_local_edges, handle.get_stream()); - size_t major_offset{0}; - for (size_t i = 0; i < edgelist_majors.size(); ++i) { - thrust::copy(handle.get_thrust_policy(), - edgelist_majors[i], - edgelist_majors[i] + edgelist_edge_counts[i], - sorted_unique_majors.begin() + major_offset); - thrust::sort(handle.get_thrust_policy(), - sorted_unique_majors.begin() + major_offset, - sorted_unique_majors.begin() + major_offset + edgelist_edge_counts[i]); - major_offset += static_cast(thrust::distance( - sorted_unique_majors.begin() + major_offset, - thrust::unique(handle.get_thrust_policy(), - sorted_unique_majors.begin() + major_offset, - sorted_unique_majors.begin() + major_offset + edgelist_edge_counts[i]))); + constexpr size_t num_bins{ + 8}; // increase the number of bins to cut peak memory usage (at the expense of additional + // computing), limit the maximum temporary memory usage to "size of local edge list + // majors|minors * 2 / # bins" + constexpr uint32_t hash_seed = + 1; // shouldn't be 0 (in that case this hash function will coincide with the hash function + // used to map vertices to GPUs, and we may not see the expected randomization) + + auto edge_major_count_vectors = num_bins > 1 + ? std::make_optional>>( + edgelist_majors.size(), std::vector(num_bins)) + : std::nullopt; + if (edge_major_count_vectors) { + for (size_t i = 0; i < edgelist_majors.size(); ++i) { + rmm::device_uvector d_edge_major_counts(num_bins, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + d_edge_major_counts.begin(), + d_edge_major_counts.end(), + edge_t{0}); + thrust::for_each( + handle.get_thrust_policy(), + edgelist_majors[i], + edgelist_majors[i] + edgelist_edge_counts[i], + [counts = raft::device_span(d_edge_major_counts.data(), + d_edge_major_counts.size())] __device__(auto v) { + cuco::detail::MurmurHash3_32 hash_func{hash_seed}; + cuda::atomic_ref atomic_counter( + counts[hash_func(v) % num_bins]); + atomic_counter.fetch_add(edge_t{1}, cuda::std::memory_order_relaxed); + }); + raft::update_host((*edge_major_count_vectors)[i].data(), + d_edge_major_counts.data(), + d_edge_major_counts.size(), + handle.get_stream()); + } } - sorted_unique_majors.resize(major_offset, handle.get_stream()); - if (edgelist_majors.size() > 1) { - thrust::sort( - handle.get_thrust_policy(), sorted_unique_majors.begin(), sorted_unique_majors.end()); + auto edge_minor_count_vectors = num_bins > 1 + ? std::make_optional>>( + edgelist_minors.size(), std::vector(num_bins)) + : std::nullopt; + if (edge_minor_count_vectors) { + for (size_t i = 0; i < edgelist_minors.size(); ++i) { + rmm::device_uvector d_edge_minor_counts(num_bins, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + d_edge_minor_counts.begin(), + d_edge_minor_counts.end(), + edge_t{0}); + thrust::for_each( + handle.get_thrust_policy(), + edgelist_minors[i], + edgelist_minors[i] + edgelist_edge_counts[i], + [counts = raft::device_span(d_edge_minor_counts.data(), + d_edge_minor_counts.size())] __device__(auto v) { + cuco::detail::MurmurHash3_32 hash_func{hash_seed}; + cuda::atomic_ref atomic_counter( + counts[hash_func(v) % num_bins]); + atomic_counter.fetch_add(edge_t{1}, cuda::std::memory_order_relaxed); + }); + raft::update_host((*edge_minor_count_vectors)[i].data(), + d_edge_minor_counts.data(), + d_edge_minor_counts.size(), + handle.get_stream()); + } } - sorted_unique_majors.shrink_to_fit(handle.get_stream()); - } - - // 2. if local_vertices.has_value() is false, find unique vertices from edge minors (to construct - // local_vertices) - rmm::device_uvector sorted_unique_minors(0, handle.get_stream()); - if (!local_vertices) { - sorted_unique_minors.resize(num_local_edges, handle.get_stream()); - size_t minor_offset{0}; - for (size_t i = 0; i < edgelist_minors.size(); ++i) { - thrust::copy(handle.get_thrust_policy(), - edgelist_minors[i], - edgelist_minors[i] + edgelist_edge_counts[i], - sorted_unique_minors.begin() + minor_offset); - thrust::sort(handle.get_thrust_policy(), - sorted_unique_minors.begin() + minor_offset, - sorted_unique_minors.begin() + minor_offset + edgelist_edge_counts[i]); - minor_offset += static_cast(thrust::distance( - sorted_unique_minors.begin() + minor_offset, - thrust::unique(handle.get_thrust_policy(), - sorted_unique_minors.begin() + minor_offset, - sorted_unique_minors.begin() + minor_offset + edgelist_edge_counts[i]))); - } - sorted_unique_minors.resize(minor_offset, handle.get_stream()); - if (edgelist_minors.size() > 1) { - thrust::sort( - handle.get_thrust_policy(), sorted_unique_minors.begin(), sorted_unique_minors.end()); - sorted_unique_minors.resize(thrust::distance(sorted_unique_minors.begin(), - thrust::unique(handle.get_thrust_policy(), - sorted_unique_minors.begin(), - sorted_unique_minors.end())), - handle.get_stream()); - } - sorted_unique_minors.shrink_to_fit(handle.get_stream()); - } + handle.sync_stream(); - // 3. update sorted_local_vertices. - // if local_vertices.has_value() is false, reconstruct local_vertices first + for (size_t i = 0; i < num_bins; ++i) { + rmm::device_uvector this_bin_sorted_unique_majors(0, handle.get_stream()); + { + std::vector> edge_partition_tmp_majors{}; // for bin "i" + edge_partition_tmp_majors.reserve(edgelist_majors.size()); + for (size_t j = 0; j < edgelist_majors.size(); ++j) { + rmm::device_uvector tmp_majors(0, handle.get_stream()); + if (num_bins > 1) { + tmp_majors.resize((*edge_major_count_vectors)[j][i], handle.get_stream()); + thrust::copy_if(handle.get_thrust_policy(), + edgelist_majors[j], + edgelist_majors[j] + edgelist_edge_counts[j], + tmp_majors.begin(), + [i] __device__(auto v) { + cuco::detail::MurmurHash3_32 hash_func{hash_seed}; + return (static_cast(hash_func(v) % num_bins) == i); + }); + } else { + tmp_majors.resize(edgelist_edge_counts[j], handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + edgelist_majors[j], + edgelist_majors[j] + edgelist_edge_counts[j], + tmp_majors.begin()); + } + thrust::sort(handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end()); + tmp_majors.resize( + thrust::distance( + tmp_majors.begin(), + thrust::unique(handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end())), + handle.get_stream()); + tmp_majors.shrink_to_fit(handle.get_stream()); + + edge_partition_tmp_majors.push_back(std::move(tmp_majors)); + } + if constexpr (multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + if (minor_comm_size > 1) { + std::vector tx_counts(minor_comm_size); + for (int j = 0; j < minor_comm_size; ++j) { + tx_counts[j] = edge_partition_tmp_majors[j].size(); + } + this_bin_sorted_unique_majors.resize(std::reduce(tx_counts.begin(), tx_counts.end()), + handle.get_stream()); + size_t output_offset{0}; + for (size_t j = 0; j < edge_partition_tmp_majors.size(); ++j) { + thrust::copy(handle.get_thrust_policy(), + edge_partition_tmp_majors[j].begin(), + edge_partition_tmp_majors[j].end(), + this_bin_sorted_unique_majors.begin() + output_offset); + output_offset += edge_partition_tmp_majors[j].size(); + } + this_bin_sorted_unique_majors = shuffle_and_unique_segment_sorted_values( + minor_comm, this_bin_sorted_unique_majors.begin(), tx_counts, handle.get_stream()); + } else { + this_bin_sorted_unique_majors = std::move(edge_partition_tmp_majors[0]); + } + } else { + this_bin_sorted_unique_majors = std::move(edge_partition_tmp_majors[0]); + } + } - if (local_vertices) { + rmm::device_uvector this_bin_sorted_unique_minors(0, handle.get_stream()); + { + std::vector> edge_partition_tmp_minors{}; // for bin "i" + edge_partition_tmp_minors.reserve(edgelist_minors.size()); + for (size_t j = 0; j < edgelist_minors.size(); ++j) { + rmm::device_uvector tmp_minors(0, handle.get_stream()); + if (num_bins > 1) { + tmp_minors.resize((*edge_minor_count_vectors)[j][i], handle.get_stream()); + thrust::copy_if(handle.get_thrust_policy(), + edgelist_minors[j], + edgelist_minors[j] + edgelist_edge_counts[j], + tmp_minors.begin(), + [i] __device__(auto v) { + cuco::detail::MurmurHash3_32 hash_func{hash_seed}; + return (static_cast(hash_func(v) % num_bins) == i); + }); + } else { + tmp_minors.resize(edgelist_edge_counts[j], handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + edgelist_minors[j], + edgelist_minors[j] + edgelist_edge_counts[j], + tmp_minors.begin()); + } + thrust::sort(handle.get_thrust_policy(), tmp_minors.begin(), tmp_minors.end()); + tmp_minors.resize( + thrust::distance( + tmp_minors.begin(), + thrust::unique(handle.get_thrust_policy(), tmp_minors.begin(), tmp_minors.end())), + handle.get_stream()); + tmp_minors.shrink_to_fit(handle.get_stream()); + + edge_partition_tmp_minors.push_back(std::move(tmp_minors)); + } + if (edge_partition_tmp_minors.size() == 1) { + this_bin_sorted_unique_minors = std::move(edge_partition_tmp_minors[0]); + } else { + edge_t aggregate_size{0}; + for (size_t j = 0; j < edge_partition_tmp_minors.size(); ++j) { + aggregate_size += edge_partition_tmp_minors[j].size(); + } + this_bin_sorted_unique_minors.resize(aggregate_size, handle.get_stream()); + size_t output_offset{0}; + for (size_t j = 0; j < edge_partition_tmp_minors.size(); ++j) { + thrust::copy(handle.get_thrust_policy(), + edge_partition_tmp_minors[j].begin(), + edge_partition_tmp_minors[j].end(), + this_bin_sorted_unique_minors.begin() + output_offset); + output_offset += edge_partition_tmp_minors[j].size(); + } + edge_partition_tmp_minors.clear(); + thrust::sort(handle.get_thrust_policy(), + this_bin_sorted_unique_minors.begin(), + this_bin_sorted_unique_minors.end()); + this_bin_sorted_unique_minors.resize( + thrust::distance(this_bin_sorted_unique_minors.begin(), + thrust::unique(handle.get_thrust_policy(), + this_bin_sorted_unique_minors.begin(), + this_bin_sorted_unique_minors.end())), + handle.get_stream()); + this_bin_sorted_unique_minors.shrink_to_fit(handle.get_stream()); + } + if constexpr (multi_gpu) { + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + if (major_comm_size > 1) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + compute_gpu_id_from_ext_vertex_t gpu_id_func{ + comm_size, major_comm_size, minor_comm_size}; + auto d_tx_counts = groupby_and_count( + this_bin_sorted_unique_minors.begin(), + this_bin_sorted_unique_minors.end(), + [major_comm_size, minor_comm_size, gpu_id_func] __device__(auto v) { + return partition_manager::compute_major_comm_rank_from_global_comm_rank( + major_comm_size, minor_comm_size, gpu_id_func(v)); + }, + major_comm_size, + std::numeric_limits::max(), + handle.get_stream()); + std::vector h_tx_counts(d_tx_counts.size()); + raft::update_host( + h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); + handle.sync_stream(); + std::vector tx_displacements(h_tx_counts.size()); + std::exclusive_scan( + h_tx_counts.begin(), h_tx_counts.end(), tx_displacements.begin(), size_t{0}); + for (int j = 0; j < major_comm_size; ++j) { + thrust::sort( + handle.get_thrust_policy(), + this_bin_sorted_unique_minors.begin() + tx_displacements[j], + this_bin_sorted_unique_minors.begin() + (tx_displacements[j] + h_tx_counts[j])); + } + this_bin_sorted_unique_minors = shuffle_and_unique_segment_sorted_values( + major_comm, this_bin_sorted_unique_minors.begin(), h_tx_counts, handle.get_stream()); + } + } + } + rmm::device_uvector this_bin_sorted_unique_vertices(0, handle.get_stream()); + { + rmm::device_uvector merged_vertices( + this_bin_sorted_unique_majors.size() + this_bin_sorted_unique_minors.size(), + handle.get_stream()); + thrust::merge(handle.get_thrust_policy(), + this_bin_sorted_unique_majors.begin(), + this_bin_sorted_unique_majors.end(), + this_bin_sorted_unique_minors.begin(), + this_bin_sorted_unique_minors.end(), + merged_vertices.begin()); + this_bin_sorted_unique_majors.resize(0, handle.get_stream()); + this_bin_sorted_unique_majors.shrink_to_fit(handle.get_stream()); + this_bin_sorted_unique_minors.resize(0, handle.get_stream()); + this_bin_sorted_unique_minors.shrink_to_fit(handle.get_stream()); + merged_vertices.resize(thrust::distance(merged_vertices.begin(), + thrust::unique(handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end())), + handle.get_stream()); + merged_vertices.shrink_to_fit(handle.get_stream()); + this_bin_sorted_unique_vertices = std::move(merged_vertices); + } + if (sorted_local_vertices.size() == 0) { + sorted_local_vertices = std::move(this_bin_sorted_unique_vertices); + } else { + rmm::device_uvector merged_vertices( + sorted_local_vertices.size() + this_bin_sorted_unique_vertices.size(), + handle.get_stream()); + thrust::merge(handle.get_thrust_policy(), + sorted_local_vertices.begin(), + sorted_local_vertices.end(), + this_bin_sorted_unique_vertices.begin(), + this_bin_sorted_unique_vertices.end(), + merged_vertices.begin()); // merging two unique sets from different hash + // bins, so the merged set can't have duplicates + sorted_local_vertices = std::move(merged_vertices); + } + } + } else { sorted_local_vertices = std::move(*local_vertices); thrust::sort( handle.get_thrust_policy(), sorted_local_vertices.begin(), sorted_local_vertices.end()); - } else { - sorted_local_vertices.resize(sorted_unique_majors.size() + sorted_unique_minors.size(), - handle.get_stream()); - - thrust::merge(handle.get_thrust_policy(), - sorted_unique_majors.begin(), - sorted_unique_majors.end(), - sorted_unique_minors.begin(), - sorted_unique_minors.end(), - sorted_local_vertices.begin()); - - sorted_unique_majors.resize(0, handle.get_stream()); - sorted_unique_majors.shrink_to_fit(handle.get_stream()); - sorted_unique_minors.resize(0, handle.get_stream()); - sorted_unique_minors.shrink_to_fit(handle.get_stream()); - - sorted_local_vertices.resize(thrust::distance(sorted_local_vertices.begin(), - thrust::unique(handle.get_thrust_policy(), - sorted_local_vertices.begin(), - sorted_local_vertices.end())), - handle.get_stream()); - sorted_local_vertices.shrink_to_fit(handle.get_stream()); - - if constexpr (multi_gpu) { - sorted_local_vertices = - cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( - handle, std::move(sorted_local_vertices)); - thrust::sort( - handle.get_thrust_policy(), sorted_local_vertices.begin(), sorted_local_vertices.end()); - sorted_local_vertices.resize(thrust::distance(sorted_local_vertices.begin(), - thrust::unique(handle.get_thrust_policy(), - sorted_local_vertices.begin(), - sorted_local_vertices.end())), - handle.get_stream()); - sorted_local_vertices.shrink_to_fit(handle.get_stream()); - } } + // 2. find an unused vertex ID + auto locally_unused_vertex_id = find_locally_unused_ext_vertex_id( handle, raft::device_span(sorted_local_vertices.data(), sorted_local_vertices.size()), @@ -363,17 +536,9 @@ std::tuple, std::vector, vertex_t> compu "Invalid input arguments: there is no unused value in the entire range of " "vertex_t, increase vertex_t to 64 bit."); - // 4. compute global degrees for the sorted local vertices + // 3. compute global degrees for the sorted local vertices rmm::device_uvector sorted_local_vertex_degrees(0, handle.get_stream()); - std::optional> stream_pool_indices{ - std::nullopt}; // FIXME: move this inside the if statement - - auto constexpr num_chunks = size_t{ - 2}; // tuning parameter, this trade-offs # binary searches (up to num_chunks times more binary - // searches can be necessary if num_unique_majors << edgelist_edge_counts[i]) and temporary - // buffer requirement (cut by num_chunks times), currently set to 2 to avoid peak memory - // usage happening in this part (especially when minor_comm_size is small) if constexpr (multi_gpu) { auto& comm = handle.get_comms(); @@ -386,94 +551,37 @@ std::tuple, std::vector, vertex_t> compu auto edge_partition_major_range_sizes = host_scalar_allgather(minor_comm, sorted_local_vertices.size(), handle.get_stream()); - if ((minor_comm_size >= 2) && (handle.get_stream_pool_size() >= 2)) { - auto vertex_edge_counts = host_scalar_allreduce( - comm, - thrust::make_tuple(static_cast(sorted_local_vertices.size()), num_local_edges), - raft::comms::op_t::SUM, - handle.get_stream()); - // memory footprint vs parallelism trade-off - // peak memory requirement per loop is approximately - // (V/P) * (sizeof(vertex_t) + sizeof(edge_t)) + - // (E / (comm_size * minor_comm_size)) / num_chunks * sizeof(vertex_t) * 2 + - // std::min(V/P, (E / (comm_size * minor_comm_size)) / num_chunks) * (sizeof(vertex_t) + - // sizeof(edge_t)) - // and limit temporary memory requirement to (E / comm_size) * sizeof(vertex_t) - auto avg_vertex_degree = thrust::get<0>(vertex_edge_counts) > 0 - ? static_cast(thrust::get<1>(vertex_edge_counts)) / - static_cast(thrust::get<0>(vertex_edge_counts)) - : double{0.0}; - auto num_streams = static_cast( - (avg_vertex_degree * sizeof(vertex_t)) / - (static_cast(sizeof(vertex_t) + sizeof(edge_t)) + - (((avg_vertex_degree / minor_comm_size) / num_chunks) * sizeof(vertex_t) * 2) + - (std::min(1.0, ((avg_vertex_degree / minor_comm_size) / num_chunks)) * - (sizeof(vertex_t) + sizeof(edge_t))))); - if (num_streams >= 2) { - stream_pool_indices = std::vector(num_streams); - std::iota((*stream_pool_indices).begin(), (*stream_pool_indices).end(), size_t{0}); - handle.sync_stream(); - } - } - for (int i = 0; i < minor_comm_size; ++i) { - auto loop_stream = stream_pool_indices - ? handle.get_stream_from_stream_pool(i % (*stream_pool_indices).size()) - : handle.get_stream(); - - rmm::device_uvector sorted_majors(edge_partition_major_range_sizes[i], loop_stream); + rmm::device_uvector sorted_majors(edge_partition_major_range_sizes[i], + handle.get_stream()); device_bcast(minor_comm, sorted_local_vertices.data(), sorted_majors.data(), edge_partition_major_range_sizes[i], i, - loop_stream); + handle.get_stream()); - rmm::device_uvector sorted_major_degrees(sorted_majors.size(), loop_stream); - thrust::fill(rmm::exec_policy(loop_stream), + rmm::device_uvector sorted_major_degrees(sorted_majors.size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), sorted_major_degrees.begin(), sorted_major_degrees.end(), edge_t{0}); - rmm::device_uvector tmp_majors(0, loop_stream); - tmp_majors.reserve( - (static_cast(edgelist_edge_counts[i]) + (num_chunks - 1)) / num_chunks, - loop_stream); - size_t offset{0}; - for (size_t j = 0; j < num_chunks; ++j) { - size_t this_chunk_size = - std::min(tmp_majors.capacity(), static_cast(edgelist_edge_counts[i]) - offset); - tmp_majors.resize(this_chunk_size, loop_stream); - thrust::copy(rmm::exec_policy(loop_stream), - edgelist_majors[i] + offset, - edgelist_majors[i] + offset + tmp_majors.size(), - tmp_majors.begin()); - thrust::sort(rmm::exec_policy(loop_stream), tmp_majors.begin(), tmp_majors.end()); - auto num_unique_majors = - thrust::count_if(rmm::exec_policy(loop_stream), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(tmp_majors.size()), - is_first_in_run_t{tmp_majors.data()}); - rmm::device_uvector tmp_keys(num_unique_majors, loop_stream); - rmm::device_uvector tmp_values(num_unique_majors, loop_stream); - thrust::reduce_by_key(rmm::exec_policy(loop_stream), - tmp_majors.begin(), - tmp_majors.end(), - thrust::make_constant_iterator(edge_t{1}), - tmp_keys.begin(), - tmp_values.begin()); - - auto kv_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(tmp_keys.begin(), tmp_values.begin())); - thrust::for_each(rmm::exec_policy(loop_stream), - kv_pair_first, - kv_pair_first + tmp_keys.size(), - search_and_increment_degree_t{ - sorted_majors.data(), - static_cast(sorted_majors.size()), - sorted_major_degrees.data()}); - offset += this_chunk_size; - } + thrust::for_each( + handle.get_thrust_policy(), + edgelist_majors[i], + edgelist_majors[i] + edgelist_edge_counts[i], + [sorted_majors = + raft::device_span(sorted_majors.data(), sorted_majors.size()), + sorted_major_degrees = raft::device_span( + sorted_major_degrees.data(), sorted_major_degrees.size())] __device__(auto major) { + auto it = + thrust::lower_bound(thrust::seq, sorted_majors.begin(), sorted_majors.end(), major); + assert((it != sorted_majors.end()) && (*it == major)); + cuda::atomic_ref atomic_counter( + sorted_major_degrees[thrust::distance(sorted_majors.begin(), it)]); + atomic_counter.fetch_add(edge_t{1}, cuda::std::memory_order_relaxed); + }); device_reduce(minor_comm, sorted_major_degrees.begin(), @@ -481,11 +589,9 @@ std::tuple, std::vector, vertex_t> compu edge_partition_major_range_sizes[i], raft::comms::op_t::SUM, i, - loop_stream); + handle.get_stream()); if (i == minor_comm_rank) { sorted_local_vertex_degrees = std::move(sorted_major_degrees); } } - - if (stream_pool_indices) { handle.sync_stream_pool(*stream_pool_indices); } } else { assert(edgelist_majors.size() == 1); @@ -495,47 +601,24 @@ std::tuple, std::vector, vertex_t> compu sorted_local_vertex_degrees.end(), edge_t{0}); - rmm::device_uvector tmp_majors(0, handle.get_stream()); - tmp_majors.reserve(static_cast(edgelist_edge_counts[0] + (num_chunks - 1)) / num_chunks, - handle.get_stream()); - size_t offset{0}; - for (size_t i = 0; i < num_chunks; ++i) { - size_t this_chunk_size = - std::min(tmp_majors.capacity(), static_cast(edgelist_edge_counts[0]) - offset); - tmp_majors.resize(this_chunk_size, handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - edgelist_majors[0] + offset, - edgelist_majors[0] + offset + tmp_majors.size(), - tmp_majors.begin()); - thrust::sort(handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end()); - auto num_unique_majors = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(tmp_majors.size()), - is_first_in_run_t{tmp_majors.data()}); - rmm::device_uvector tmp_keys(num_unique_majors, handle.get_stream()); - rmm::device_uvector tmp_values(num_unique_majors, handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - tmp_majors.begin(), - tmp_majors.end(), - thrust::make_constant_iterator(edge_t{1}), - tmp_keys.begin(), - tmp_values.begin()); - - auto kv_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(tmp_keys.begin(), tmp_values.begin())); - thrust::for_each(handle.get_thrust_policy(), - kv_pair_first, - kv_pair_first + tmp_keys.size(), - search_and_increment_degree_t{ - sorted_local_vertices.data(), - static_cast(sorted_local_vertices.size()), - sorted_local_vertex_degrees.data()}); - offset += this_chunk_size; - } + thrust::for_each(handle.get_thrust_policy(), + edgelist_majors[0], + edgelist_majors[0] + edgelist_edge_counts[0], + [sorted_majors = raft::device_span( + sorted_local_vertices.data(), sorted_local_vertices.size()), + sorted_major_degrees = raft::device_span( + sorted_local_vertex_degrees.data(), + sorted_local_vertex_degrees.size())] __device__(auto major) { + auto it = thrust::lower_bound( + thrust::seq, sorted_majors.begin(), sorted_majors.end(), major); + assert((it != sorted_majors.end()) && (*it == major)); + cuda::atomic_ref atomic_counter( + sorted_major_degrees[thrust::distance(sorted_majors.begin(), it)]); + atomic_counter.fetch_add(edge_t{1}, cuda::std::memory_order_relaxed); + }); } - // 4. sort local vertices by degree (descending) + // 5. sort local vertices by degree (descending) thrust::sort_by_key(handle.get_thrust_policy(), sorted_local_vertex_degrees.begin(), @@ -543,7 +626,7 @@ std::tuple, std::vector, vertex_t> compu sorted_local_vertices.begin(), thrust::greater()); - // 5. compute segment_offsets + // 6. compute segment_offsets static_assert(detail::num_sparse_segments_per_vertex_partition == 3); static_assert((detail::low_degree_threshold <= detail::mid_degree_threshold) && @@ -553,57 +636,85 @@ std::tuple, std::vector, vertex_t> compu (detail::hypersparse_threshold_ratio <= 1.0)); size_t mid_degree_threshold{detail::mid_degree_threshold}; size_t low_degree_threshold{detail::low_degree_threshold}; - size_t hypersparse_degree_threshold{0}; + size_t hypersparse_degree_threshold{1}; if (multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); mid_degree_threshold *= minor_comm_size; low_degree_threshold *= minor_comm_size; - hypersparse_degree_threshold = - static_cast(minor_comm_size * detail::hypersparse_threshold_ratio); + hypersparse_degree_threshold = std::max( + static_cast(minor_comm_size * detail::hypersparse_threshold_ratio), size_t{1}); } - auto num_segments_per_vertex_partition = - detail::num_sparse_segments_per_vertex_partition + - (hypersparse_degree_threshold > 0 ? size_t{2} : size_t{1}); // last is 0-degree segment - rmm::device_uvector d_thresholds(num_segments_per_vertex_partition - 1, - handle.get_stream()); - auto h_thresholds = - hypersparse_degree_threshold > 0 - ? std::vector{static_cast(mid_degree_threshold), - static_cast(low_degree_threshold), - static_cast(hypersparse_degree_threshold), - std::min(static_cast(hypersparse_degree_threshold), edge_t{1})} - : std::vector{static_cast(mid_degree_threshold), - static_cast(low_degree_threshold), - edge_t{1}}; - raft::update_device( - d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); - - rmm::device_uvector d_segment_offsets(num_segments_per_vertex_partition + 1, - handle.get_stream()); - auto vertex_count = static_cast(sorted_local_vertices.size()); - d_segment_offsets.set_element_to_zero_async(0, handle.get_stream()); - d_segment_offsets.set_element( - num_segments_per_vertex_partition, vertex_count, handle.get_stream()); + std::vector h_segment_offsets{}; + std::optional> h_hypersparse_degree_offsets{}; + { + auto num_partitions = detail::num_sparse_segments_per_vertex_partition /* high, mid, low */ + + (hypersparse_degree_threshold > 1 + ? hypersparse_degree_threshold - size_t{1} + /* one partition per each global degree in the hypersparse region */ + : size_t{0}) + + size_t{1} /* zero */; + rmm::device_uvector d_thresholds(num_partitions - 1, handle.get_stream()); + thrust::tabulate(handle.get_thrust_policy(), + d_thresholds.begin(), + d_thresholds.end(), + [mid_degree_threshold, + low_degree_threshold, + hypersparse_degree_threshold] __device__(size_t i) { + if (i == 0) { + return mid_degree_threshold; // high,mid boundary + } else if (i == 1) { + return low_degree_threshold; // mid, low boundary + } else { + assert(hypersparse_degree_threshold > (i - 2)); + return hypersparse_degree_threshold - (i - 2); + } + }); + rmm::device_uvector d_offsets(num_partitions + 1, handle.get_stream()); + d_offsets.set_element_to_zero_async(0, handle.get_stream()); + auto vertex_count = static_cast(sorted_local_vertices.size()); + d_offsets.set_element(num_partitions, vertex_count, handle.get_stream()); + thrust::upper_bound(handle.get_thrust_policy(), + sorted_local_vertex_degrees.begin(), + sorted_local_vertex_degrees.end(), + d_thresholds.begin(), + d_thresholds.end(), + d_offsets.begin() + 1, + thrust::greater{}); + std::vector h_offsets(d_offsets.size()); + raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream()); + handle.sync_stream(); - thrust::upper_bound(handle.get_thrust_policy(), - sorted_local_vertex_degrees.begin(), - sorted_local_vertex_degrees.end(), - d_thresholds.begin(), - d_thresholds.end(), - d_segment_offsets.begin() + 1, - thrust::greater{}); - - std::vector h_segment_offsets(d_segment_offsets.size()); - raft::update_host(h_segment_offsets.data(), - d_segment_offsets.data(), - d_segment_offsets.size(), - handle.get_stream()); - handle.sync_stream(); + auto num_segments_per_vertex_partition = + detail::num_sparse_segments_per_vertex_partition + + (hypersparse_degree_threshold > 1 ? size_t{2} : size_t{1}); // last is 0-degree segment + h_segment_offsets.resize(num_segments_per_vertex_partition + 1); + std::copy(h_offsets.begin(), + h_offsets.begin() + num_sparse_segments_per_vertex_partition + 1, + h_segment_offsets.begin()); + *(h_segment_offsets.rbegin()) = *(h_offsets.rbegin()); + if (hypersparse_degree_threshold > 1) { + *(h_segment_offsets.rbegin() + 1) = *(h_offsets.rbegin() + 1); + + h_hypersparse_degree_offsets = std::vector(hypersparse_degree_threshold); + std::copy(h_offsets.begin() + num_sparse_segments_per_vertex_partition, + h_offsets.begin() + num_sparse_segments_per_vertex_partition + + (hypersparse_degree_threshold - 1), + (*h_hypersparse_degree_offsets).begin()); + auto shift = (*h_hypersparse_degree_offsets)[0]; + std::transform((*h_hypersparse_degree_offsets).begin(), + (*h_hypersparse_degree_offsets).end(), + (*h_hypersparse_degree_offsets).begin(), + [shift](auto offset) { return offset - shift; }); + *((*h_hypersparse_degree_offsets).rbegin()) = *(h_offsets.rbegin() + 1); + } + } - return std::make_tuple( - std::move(sorted_local_vertices), h_segment_offsets, *locally_unused_vertex_id); + return std::make_tuple(std::move(sorted_local_vertices), + h_segment_offsets, + h_hypersparse_degree_offsets, + *locally_unused_vertex_id); } template @@ -789,32 +900,28 @@ void expensive_check_edgelist( } template -std::vector aggregate_segment_offsets(raft::handle_t const& handle, - std::vector const& segment_offsets) +std::vector aggregate_offset_vectors(raft::handle_t const& handle, + std::vector const& offsets) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - rmm::device_uvector d_segment_offsets(segment_offsets.size(), handle.get_stream()); - raft::update_device( - d_segment_offsets.data(), segment_offsets.data(), segment_offsets.size(), handle.get_stream()); - rmm::device_uvector d_aggregate_segment_offsets( - minor_comm_size * d_segment_offsets.size(), handle.get_stream()); - minor_comm.allgather(d_segment_offsets.data(), - d_aggregate_segment_offsets.data(), - d_segment_offsets.size(), - handle.get_stream()); - - std::vector h_aggregate_segment_offsets(d_aggregate_segment_offsets.size(), - vertex_t{0}); - raft::update_host(h_aggregate_segment_offsets.data(), - d_aggregate_segment_offsets.data(), - d_aggregate_segment_offsets.size(), + rmm::device_uvector d_offsets(offsets.size(), handle.get_stream()); + raft::update_device(d_offsets.data(), offsets.data(), offsets.size(), handle.get_stream()); + rmm::device_uvector d_aggregate_offset_vectors(minor_comm_size * d_offsets.size(), + handle.get_stream()); + minor_comm.allgather( + d_offsets.data(), d_aggregate_offset_vectors.data(), d_offsets.size(), handle.get_stream()); + + std::vector h_aggregate_offset_vectors(d_aggregate_offset_vectors.size(), vertex_t{0}); + raft::update_host(h_aggregate_offset_vectors.data(), + d_aggregate_offset_vectors.data(), + d_aggregate_offset_vectors.size(), handle.get_stream()); handle.sync_stream(); // this is necessary as h_aggregate_offsets can be used right after return. - return h_aggregate_segment_offsets; + return h_aggregate_offset_vectors; } } // namespace detail @@ -857,10 +964,10 @@ renumber_edgelist( (*edgelist_intra_partition_segment_offsets).size() == static_cast(minor_comm_size), "Invalid input arguments: erroneous (*edgelist_intra_partition_segment_offsets).size()."); for (size_t i = 0; i < edgelist_majors.size(); ++i) { - CUGRAPH_EXPECTS( - (*edgelist_intra_partition_segment_offsets)[i].size() == - static_cast(major_comm_size + 1), - "Invalid input arguments: erroneous (*edgelist_intra_partition_segment_offsets)[].size()."); + CUGRAPH_EXPECTS((*edgelist_intra_partition_segment_offsets)[i].size() == + static_cast(major_comm_size + 1), + "Invalid input arguments: erroneous " + "(*edgelist_intra_partition_segment_offsets)[].size()."); CUGRAPH_EXPECTS( std::is_sorted((*edgelist_intra_partition_segment_offsets)[i].begin(), (*edgelist_intra_partition_segment_offsets)[i].end()), @@ -868,8 +975,8 @@ renumber_edgelist( CUGRAPH_EXPECTS( ((*edgelist_intra_partition_segment_offsets)[i][0] == 0) && ((*edgelist_intra_partition_segment_offsets)[i].back() == edgelist_edge_counts[i]), - "Invalid input arguments: (*edgelist_intra_partition_segment_offsets)[][0] should be 0 and " - "(*edgelist_intra_partition_segment_offsets)[].back() should coincide with " + "Invalid input arguments: (*edgelist_intra_partition_segment_offsets)[][0] should be 0 " + "and (*edgelist_intra_partition_segment_offsets)[].back() should coincide with " "edgelist_edge_counts[]."); } } @@ -893,7 +1000,10 @@ renumber_edgelist( // 1. compute renumber map - auto [renumber_map_labels, vertex_partition_segment_offsets, locally_unused_vertex_id] = + auto [renumber_map_labels, + vertex_partition_segment_offsets, + vertex_partition_hypersparse_degree_offsets, + locally_unused_vertex_id] = detail::compute_renumber_map(handle, std::move(local_vertices), edgelist_const_majors, @@ -966,11 +1076,16 @@ renumber_edgelist( } } - if ((static_cast(partition.local_edge_partition_minor_range_size() * - 2.5 /* tuning parameter */) >= - static_cast(number_of_edges / comm_size)) && - edgelist_intra_partition_segment_offsets) { // memory footprint dominated by the O(V/sqrt(P)) - // part than the O(E/P) part + double approx_mem_requirements = + static_cast(partition.local_edge_partition_minor_range_size()) * + (static_cast( + sizeof(vertex_t)) /* rmm::device_uvector renumber_map_minor_labels */ + + + static_cast(sizeof(vertex_t) * 2) * + 2.5 /* kv_store_t renumber_map, * 2.5 to consider load factor */); + if ((approx_mem_requirements > + static_cast(handle.get_device_properties().totalGlobalMem) * 0.05) && + edgelist_intra_partition_segment_offsets) { vertex_t max_segment_size{0}; for (int i = 0; i < major_comm_size; ++i) { auto minor_range_vertex_partition_id = @@ -1020,10 +1135,10 @@ renumber_edgelist( recvcounts[i] = partition.vertex_partition_range_size(minor_range_vertex_partition_id); } std::vector displacements(recvcounts.size(), 0); - std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); device_allgatherv(major_comm, - renumber_map_labels.begin(), - renumber_map_minor_labels.begin(), + renumber_map_labels.data(), + renumber_map_minor_labels.data(), recvcounts, displacements, handle.get_stream()); @@ -1045,12 +1160,20 @@ renumber_edgelist( } auto edge_partition_segment_offsets = - detail::aggregate_segment_offsets(handle, vertex_partition_segment_offsets); + detail::aggregate_offset_vectors(handle, vertex_partition_segment_offsets); + auto edge_partition_hypersparse_degree_offsets = + vertex_partition_hypersparse_degree_offsets + ? std::make_optional( + detail::aggregate_offset_vectors(handle, *vertex_partition_hypersparse_degree_offsets)) + : std::nullopt; return std::make_tuple( std::move(renumber_map_labels), - renumber_meta_t{ - number_of_vertices, number_of_edges, partition, edge_partition_segment_offsets}); + renumber_meta_t{number_of_vertices, + number_of_edges, + partition, + edge_partition_segment_offsets, + edge_partition_hypersparse_degree_offsets}); } template @@ -1078,7 +1201,10 @@ renumber_edgelist(raft::handle_t const& handle, std::nullopt); } - auto [renumber_map_labels, segment_offsets, locally_unused_vertex_id] = + auto [renumber_map_labels, + segment_offsets, + hypersparse_degree_offsets, + locally_unused_vertex_id] = detail::compute_renumber_map( handle, std::move(vertices), @@ -1099,8 +1225,9 @@ renumber_edgelist(raft::handle_t const& handle, renumber_map_view.find( edgelist_minors, edgelist_minors + num_edgelist_edges, edgelist_minors, handle.get_stream()); - return std::make_tuple(std::move(renumber_map_labels), - renumber_meta_t{segment_offsets}); + return std::make_tuple( + std::move(renumber_map_labels), + renumber_meta_t{segment_offsets, hypersparse_degree_offsets}); } } // namespace cugraph diff --git a/cpp/src/structure/renumber_utils_impl.cuh b/cpp/src/structure/renumber_utils_impl.cuh index 3efa58d9632..69c7c556bd8 100644 --- a/cpp/src/structure/renumber_utils_impl.cuh +++ b/cpp/src/structure/renumber_utils_impl.cuh @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -363,7 +364,7 @@ void renumber_ext_vertices(raft::handle_t const& handle, } std::unique_ptr> renumber_map_ptr{nullptr}; - if (multi_gpu) { + if constexpr (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -402,11 +403,12 @@ void renumber_ext_vertices(raft::handle_t const& handle, rmm::device_uvector int_vertices_for_sorted_unique_ext_vertices(0, handle.get_stream()); auto [unique_ext_vertices, int_vertices_for_unique_ext_vertices] = - collect_values_for_unique_keys(handle, + collect_values_for_unique_keys(comm, local_renumber_map.view(), std::move(sorted_unique_ext_vertices), detail::compute_gpu_id_from_ext_vertex_t{ - comm_size, major_comm_size, minor_comm_size}); + comm_size, major_comm_size, minor_comm_size}, + handle.get_stream()); renumber_map_ptr = std::make_unique>( unique_ext_vertices.begin(), @@ -573,7 +575,6 @@ void unrenumber_int_vertices(raft::handle_t const& handle, auto local_int_vertex_first = vertex_partition_id == 0 ? vertex_t{0} : vertex_partition_range_lasts[vertex_partition_id - 1]; - auto local_int_vertex_last = vertex_partition_range_lasts[vertex_partition_id]; rmm::device_uvector sorted_unique_int_vertices(num_vertices, handle.get_stream()); sorted_unique_int_vertices.resize( @@ -595,16 +596,20 @@ void unrenumber_int_vertices(raft::handle_t const& handle, sorted_unique_int_vertices.end())), handle.get_stream()); - auto [unique_int_vertices, ext_vertices_for_unique_int_vertices] = - collect_values_for_unique_int_vertices(handle, - std::move(sorted_unique_int_vertices), - renumber_map_labels, - vertex_partition_range_lasts); + auto ext_vertices_for_sorted_unique_int_vertices = + collect_values_for_sorted_unique_int_vertices( + comm, + raft::device_span(sorted_unique_int_vertices.data(), + sorted_unique_int_vertices.size()), + renumber_map_labels, + vertex_partition_range_lasts, + local_int_vertex_first, + handle.get_stream()); kv_store_t renumber_map( - unique_int_vertices.begin(), - unique_int_vertices.begin() + unique_int_vertices.size(), - ext_vertices_for_unique_int_vertices.begin(), + sorted_unique_int_vertices.begin(), + sorted_unique_int_vertices.end(), + ext_vertices_for_sorted_unique_int_vertices.begin(), invalid_vertex_id::value, invalid_vertex_id::value, handle.get_stream()); diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh index 8a18dedd2ab..ba40db1f085 100644 --- a/cpp/src/traversal/bfs_impl.cuh +++ b/cpp/src/traversal/bfs_impl.cuh @@ -16,8 +16,9 @@ #pragma once #include "prims/fill_edge_src_dst_property.cuh" +#include "prims/per_v_transform_reduce_if_incoming_outgoing_e.cuh" #include "prims/reduce_op.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -51,6 +52,24 @@ namespace cugraph { namespace { +template +struct direction_optimizing_info_t { + rmm::device_uvector + approx_out_degrees; // if graph_view.local_vertex_partition_segment_offsets().has_value() is + // true, holds approximate degrees only for the high and mid degree + // segments; otherwise, exact + rmm::device_uvector visited_bitmap; + std::optional> nzd_unvisited_vertices{ + std::nullopt}; // valid only during bottom-up iterations + std::optional num_nzd_unvisited_low_degree_vertices{ + std::nullopt}; // to decide between topdown vs bottomup, relevant only when + // graph_view.local_vertex_partition_segment_offsets().has_value() is true + std::optional num_nzd_unvisited_hypersparse_vertices{ + std::nullopt}; // to decide between topdown vs bottomup, relevant only when + // graph_view.local_vertex_partition_segment_offsets().has_value() && + // graph_view.use_dcs() are both true +}; + template struct topdown_e_op_t { detail::edge_partition_endpoint_property_device_view_t @@ -69,18 +88,25 @@ struct topdown_e_op_t { } }; -template +template struct bottomup_e_op_t { - detail::edge_partition_endpoint_property_device_view_t + __device__ vertex_t operator()( + vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + { + return dst; + } +}; + +template +struct bottomup_pred_op_t { + detail::edge_partition_endpoint_property_device_view_t prev_visited_flags{}; // visited in the previous iterations vertex_t dst_first{}; - __device__ thrust::optional operator()( + __device__ bool operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { - auto dst_offset = dst - dst_first; - auto old = prev_visited_flags.get(dst_offset); - return old ? thrust::optional{dst} : thrust::nullopt; + return prev_visited_flags.get(dst - dst_first); } }; @@ -144,14 +170,27 @@ void bfs(raft::handle_t const& handle, if constexpr (GraphViewType::is_multi_gpu) { is_sorted = static_cast(host_scalar_allreduce(handle.get_comms(), static_cast(is_sorted), - raft::comms::op_t::SUM, + raft::comms::op_t::MIN, handle.get_stream())); } - CUGRAPH_EXPECTS( is_sorted, "Invalid input arguments: input sources should be sorted in the non-descending order."); + bool no_duplicates = (static_cast(thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(n_sources), + is_first_in_run_t{sources})) == n_sources); + if constexpr (GraphViewType::is_multi_gpu) { + no_duplicates = static_cast(host_scalar_allreduce(handle.get_comms(), + static_cast(no_duplicates), + raft::comms::op_t::MIN, + handle.get_stream())); + } + CUGRAPH_EXPECTS(no_duplicates, + "Invalid input arguments: input sources should not have duplicates."); + auto num_invalid_vertices = thrust::count_if(handle.get_thrust_policy(), sources, @@ -189,34 +228,119 @@ void bfs(raft::handle_t const& handle, // 3. update meta data for direction optimizing BFS - constexpr edge_t direction_optimizing_alpha = 14; - constexpr vertex_t direction_optimizing_beta = 24; + auto segment_offsets = graph_view.local_vertex_partition_segment_offsets(); + + double direction_optimizing_alpha = + (graph_view.number_of_vertices() > 0) + ? ((static_cast(graph_view.compute_number_of_edges(handle)) / + static_cast(graph_view.number_of_vertices())) * + (1.0 / 3.75) /* tuning parametger */) + : double{1.0}; + constexpr vertex_t direction_optimizing_beta = 24; // tuning parameter - std::optional> out_degrees{std::nullopt}; - std::optional> nzd_unvisited_vertices{std::nullopt}; + std::optional> aux_info{std::nullopt}; if (direction_optimizing) { - out_degrees = graph_view.compute_out_degrees(handle); - nzd_unvisited_vertices = rmm::device_uvector( - graph_view.local_vertex_partition_range_size(), handle.get_stream()); - (*nzd_unvisited_vertices) - .resize(thrust::distance( - (*nzd_unvisited_vertices).begin(), - thrust::copy_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()), - thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()), - (*nzd_unvisited_vertices).begin(), - [vertex_partition, - sources = raft::device_span(sources, n_sources), - out_degrees = raft::device_span( - (*out_degrees).data(), (*out_degrees).size())] __device__(vertex_t v) { - auto v_offset = - vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - return (out_degrees[v_offset] > edge_t{0}) && - !thrust::binary_search(thrust::seq, sources.begin(), sources.end(), v); - })), - handle.get_stream()); - (*nzd_unvisited_vertices).shrink_to_fit(handle.get_stream()); + rmm::device_uvector approx_out_degrees(0, handle.get_stream()); + if (segment_offsets) { // exploit internal knowedge for exhaustive performance optimization for + // large-scale benchmarking (the else path is sufficient for small + // clusters with few tens of GPUs) + size_t partition_idx{0}; + size_t partition_size{1}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_rank = minor_comm.get_rank(); + auto const minor_comm_size = minor_comm.get_size(); + partition_idx = static_cast(minor_comm_rank); + partition_size = static_cast(minor_comm_size); + } + + auto edge_partition = + edge_partition_device_view_t( + graph_view.local_edge_partition_view(partition_idx)); + auto edge_mask_view = graph_view.edge_mask_view(); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, partition_idx) + : thrust::nullopt; + auto high_and_mid_degree_segment_size = + (*segment_offsets)[2]; // compute local degrees for high & mid degree segments only, for + // low & hypersparse segments, use low_degree_threshold * + // partition_size * 0.5 & partition_size * + // hypersparse_threshold_ratio * 0.5 as approximate out degrees + if (edge_partition_e_mask) { + approx_out_degrees = edge_partition.compute_local_degrees_with_mask( + (*edge_partition_e_mask).value_first(), + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()) + + high_and_mid_degree_segment_size, + handle.get_stream()); + } else { + approx_out_degrees = edge_partition.compute_local_degrees( + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()) + + high_and_mid_degree_segment_size, + handle.get_stream()); + } + thrust::transform(handle.get_thrust_policy(), + approx_out_degrees.begin(), + approx_out_degrees.end(), + approx_out_degrees.begin(), + multiplier_t{static_cast( + partition_size)}); // local_degrees => approximate global degrees + } else { + approx_out_degrees = graph_view.compute_out_degrees(handle); // exact + } + + rmm::device_uvector visited_bitmap( + packed_bool_size(graph_view.local_vertex_partition_range_size()), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + visited_bitmap.begin(), + visited_bitmap.end(), + packed_bool_empty_mask()); + thrust::for_each( + handle.get_thrust_policy(), + sources, + sources + n_sources, + [bitmap = raft::device_span(visited_bitmap.data(), visited_bitmap.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + cuda::atomic_ref word( + bitmap[packed_bool_offset(v_offset)]); + word.fetch_or(packed_bool_mask(v_offset), cuda::std::memory_order_relaxed); + }); + + std::optional num_nzd_unvisited_low_degree_vertices{std::nullopt}; + std::optional num_nzd_unvisited_hypersparse_vertices{std::nullopt}; + if (segment_offsets) { + num_nzd_unvisited_low_degree_vertices = (*segment_offsets)[3] - (*segment_offsets)[2]; + if (graph_view.use_dcs()) { + num_nzd_unvisited_hypersparse_vertices = (*segment_offsets)[4] - (*segment_offsets)[3]; + } + if (n_sources > 0) { + std::vector h_sources(n_sources); + raft::update_host(h_sources.data(), sources, n_sources, handle.get_stream()); + handle.sync_stream(); + for (size_t i = 0; i < h_sources.size(); ++i) { + auto v_offset = h_sources[i] - graph_view.local_vertex_partition_range_first(); + if ((v_offset >= (*segment_offsets)[2]) && (v_offset < (*segment_offsets)[3])) { + --(*num_nzd_unvisited_low_degree_vertices); + } else if (graph_view.use_dcs()) { + if ((v_offset >= (*segment_offsets)[3]) && (v_offset < (*segment_offsets)[4])) { + --(*num_nzd_unvisited_hypersparse_vertices); + } + } + } + } + } + + aux_info = + direction_optimizing_info_t{std::move(approx_out_degrees), + std::move(visited_bitmap), + std::nullopt, + num_nzd_unvisited_low_degree_vertices, + num_nzd_unvisited_hypersparse_vertices}; } // 4. initialize BFS frontier @@ -237,7 +361,6 @@ void bfs(raft::handle_t const& handle, handle, graph_view); // this may mark some vertices visited in previous iterations as unvisited // (but this is OK as we check prev_dst_visited_flags first) fill_edge_dst_property(handle, graph_view, dst_visited_flags.mutable_view(), false); - fill_edge_dst_property(handle, graph_view, vertex_frontier.bucket(bucket_idx_cur).begin(), @@ -247,12 +370,12 @@ void bfs(raft::handle_t const& handle, // 4. BFS iteration vertex_t depth{0}; - bool top_down = true; - auto cur_aggregate_vertex_frontier_size = + bool topdown = true; + auto cur_aggregate_frontier_size = static_cast(vertex_frontier.bucket(bucket_idx_cur).aggregate_size()); while (true) { - vertex_t next_aggregate_vertex_frontier_size{}; - if (top_down) { + vertex_t next_aggregate_frontier_size{}; + if (topdown) { topdown_e_op_t e_op{}; e_op.prev_visited_flags = detail::edge_partition_endpoint_property_device_view_t( @@ -263,14 +386,15 @@ void bfs(raft::handle_t const& handle, e_op.dst_first = graph_view.local_edge_partition_dst_range_first(); auto [new_frontier_vertex_buffer, predecessor_buffer] = - transform_reduce_v_frontier_outgoing_e_by_dst(handle, - graph_view, - vertex_frontier.bucket(bucket_idx_cur), - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - e_op, - reduce_op::any()); + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( + handle, + graph_view, + vertex_frontier.bucket(bucket_idx_cur), + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + e_op, + reduce_op::any()); auto input_pair_first = thrust::make_zip_iterator(thrust::make_constant_iterator(depth + 1), predecessor_buffer.begin()); @@ -286,9 +410,9 @@ void bfs(raft::handle_t const& handle, key_bucket_t( handle, std::move(new_frontier_vertex_buffer)); - next_aggregate_vertex_frontier_size = + next_aggregate_frontier_size = static_cast(vertex_frontier.bucket(bucket_idx_next).aggregate_size()); - if (next_aggregate_vertex_frontier_size == 0) { break; } + if (next_aggregate_frontier_size == 0) { break; } fill_edge_dst_property(handle, graph_view, @@ -298,65 +422,146 @@ void bfs(raft::handle_t const& handle, true); if (direction_optimizing) { - auto m_f = thrust::transform_reduce( - handle.get_thrust_policy(), - vertex_frontier.bucket(bucket_idx_next).begin(), - vertex_frontier.bucket(bucket_idx_next).end(), - cuda::proclaim_return_type( - [vertex_partition, - out_degrees = raft::device_span( - (*out_degrees).data(), (*out_degrees).size())] __device__(vertex_t v) { - auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - return out_degrees[v_offset]; - }), - edge_t{0}, - thrust::plus{}); + if (vertex_frontier.bucket(bucket_idx_next).size() > 0) { + thrust::for_each( + handle.get_thrust_policy(), + vertex_frontier.bucket(bucket_idx_next).begin(), + vertex_frontier.bucket(bucket_idx_next).end(), + [bitmap = raft::device_span((*aux_info).visited_bitmap.data(), + (*aux_info).visited_bitmap.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + cuda::atomic_ref word( + bitmap[packed_bool_offset(v_offset)]); + word.fetch_or(packed_bool_mask(v_offset), cuda::std::memory_order_relaxed); + }); + } + double m_f{0.0}; + double m_u{0.0}; { - rmm::device_uvector tmp_vertices((*nzd_unvisited_vertices).size(), - handle.get_stream()); - tmp_vertices.resize( - thrust::distance(tmp_vertices.begin(), - thrust::set_difference(handle.get_thrust_policy(), - (*nzd_unvisited_vertices).begin(), - (*nzd_unvisited_vertices).end(), - vertex_frontier.bucket(bucket_idx_next).begin(), - vertex_frontier.bucket(bucket_idx_next).end(), - tmp_vertices.begin())), - handle.get_stream()); - nzd_unvisited_vertices = std::move(tmp_vertices); + size_t partition_size{1}; + if constexpr (GraphViewType::is_multi_gpu) { + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + partition_size = static_cast(minor_comm_size); + } + + auto f_vertex_first = vertex_frontier.bucket(bucket_idx_next).begin(); + auto f_vertex_last = vertex_frontier.bucket(bucket_idx_next).end(); + + if (segment_offsets) { + // FIXME: this actually over-estimates for graphs with power-law degree distribution + auto approx_low_segment_degree = + static_cast(low_degree_threshold * partition_size) * 0.5; + auto approx_hypersparse_segment_degree = + static_cast(partition_size) * hypersparse_threshold_ratio * 0.5; + auto f_segment_offsets = compute_key_segment_offsets( + vertex_frontier.bucket(bucket_idx_next).begin(), + vertex_frontier.bucket(bucket_idx_next).end(), + raft::host_span((*segment_offsets).data(), (*segment_offsets).size()), + graph_view.local_vertex_partition_range_first(), + handle.get_stream()); + *((*aux_info).num_nzd_unvisited_low_degree_vertices) -= + (f_segment_offsets[3] - f_segment_offsets[2]); + if (graph_view.use_dcs()) { + *((*aux_info).num_nzd_unvisited_hypersparse_vertices) -= + (f_segment_offsets[4] - f_segment_offsets[3]); + } + f_vertex_last = f_vertex_first + f_segment_offsets[2]; + m_f = static_cast((f_segment_offsets[3] - f_segment_offsets[2])) * + approx_low_segment_degree; + if (graph_view.use_dcs()) { + m_f += static_cast(f_segment_offsets[4] - f_segment_offsets[3]) * + approx_hypersparse_segment_degree; + } + + m_u = static_cast(*((*aux_info).num_nzd_unvisited_low_degree_vertices)) * + approx_low_segment_degree; + if (graph_view.use_dcs()) { + m_u += static_cast(*((*aux_info).num_nzd_unvisited_hypersparse_vertices)) * + approx_hypersparse_segment_degree; + } + } + + m_f += static_cast(thrust::transform_reduce( + handle.get_thrust_policy(), + f_vertex_first, + f_vertex_last, + cuda::proclaim_return_type( + [out_degrees = raft::device_span((*aux_info).approx_out_degrees.data(), + (*aux_info).approx_out_degrees.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { + auto v_offset = v - v_first; + return out_degrees[v_offset]; + }), + edge_t{0}, + thrust::plus{})); + + m_u += static_cast(thrust::transform_reduce( + handle.get_thrust_policy(), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(segment_offsets + ? (*segment_offsets)[2] + : graph_view.local_vertex_partition_range_size()), + cuda::proclaim_return_type( + [out_degrees = raft::device_span((*aux_info).approx_out_degrees.data(), + (*aux_info).approx_out_degrees.size()), + bitmap = raft::device_span( + (*aux_info).visited_bitmap.data(), + (*aux_info).visited_bitmap.size())] __device__(vertex_t v_offset) { + auto word = bitmap[packed_bool_offset(v_offset)]; + if ((word & packed_bool_mask(v_offset)) != packed_bool_empty_mask()) { // visited + return edge_t{0}; + } else { + return out_degrees[v_offset]; + } + }), + edge_t{0}, + thrust::plus{})); } - auto m_u = thrust::transform_reduce( - handle.get_thrust_policy(), - (*nzd_unvisited_vertices).begin(), - (*nzd_unvisited_vertices).end(), - cuda::proclaim_return_type( - [vertex_partition, - out_degrees = raft::device_span( - (*out_degrees).data(), (*out_degrees).size())] __device__(vertex_t v) { - auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - return out_degrees[v_offset]; - }), - edge_t{0}, - thrust::plus{}); - auto aggregate_m_f = - GraphViewType::is_multi_gpu - ? host_scalar_allreduce( - handle.get_comms(), m_f, raft::comms::op_t::SUM, handle.get_stream()) - : m_f; - auto aggregate_m_u = - GraphViewType::is_multi_gpu - ? host_scalar_allreduce( - handle.get_comms(), m_u, raft::comms::op_t::SUM, handle.get_stream()) - : m_u; + auto aggregate_m_f = m_f; + auto aggregate_m_u = m_u; + if constexpr (GraphViewType::is_multi_gpu) { + auto tmp = host_scalar_allreduce(handle.get_comms(), + thrust::make_tuple(m_f, m_u), + raft::comms::op_t::SUM, + handle.get_stream()); + aggregate_m_f = thrust::get<0>(tmp); + aggregate_m_u = thrust::get<1>(tmp); + } if ((aggregate_m_f * direction_optimizing_alpha > aggregate_m_u) && - (next_aggregate_vertex_frontier_size >= cur_aggregate_vertex_frontier_size)) { - top_down = false; + (next_aggregate_frontier_size >= cur_aggregate_frontier_size)) { + topdown = false; + (*aux_info).nzd_unvisited_vertices = rmm::device_uvector( + segment_offsets ? *((*segment_offsets).rbegin() + 1) + : graph_view.local_vertex_partition_range_size(), + handle.get_stream()); + (*((*aux_info).nzd_unvisited_vertices)) + .resize( + thrust::distance( + (*((*aux_info).nzd_unvisited_vertices)).begin(), + thrust::copy_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator( + segment_offsets ? graph_view.local_vertex_partition_range_first() + + *((*segment_offsets).rbegin() + 1) + : graph_view.local_vertex_partition_range_last()), + (*((*aux_info).nzd_unvisited_vertices)).begin(), + [bitmap = raft::device_span((*aux_info).visited_bitmap.data(), + (*aux_info).visited_bitmap.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + auto word = bitmap[packed_bool_offset(v_offset)]; + return ((word & packed_bool_mask(v_offset)) == packed_bool_empty_mask()); + })), + handle.get_stream()); } } - if (top_down) { // staying in top-down + if (topdown) { // staying in top-down vertex_frontier.bucket(bucket_idx_cur) = key_bucket_t(handle); vertex_frontier.swap_buckets(bucket_idx_cur, bucket_idx_next); @@ -364,63 +569,122 @@ void bfs(raft::handle_t const& handle, vertex_frontier.bucket(bucket_idx_cur) = key_bucket_t( handle, - raft::device_span((*nzd_unvisited_vertices).data(), - (*nzd_unvisited_vertices).size())); + raft::device_span((*((*aux_info).nzd_unvisited_vertices)).data(), + (*((*aux_info).nzd_unvisited_vertices)).size())); vertex_frontier.bucket(bucket_idx_next) = key_bucket_t(handle); } } else { // bottom up - bottomup_e_op_t e_op{}; - e_op.prev_visited_flags = - detail::edge_partition_endpoint_property_device_view_t( - prev_dst_visited_flags.mutable_view()); - e_op.dst_first = graph_view.local_edge_partition_dst_range_first(); - auto [new_frontier_vertex_buffer, predecessor_buffer] = - transform_reduce_v_frontier_outgoing_e_by_src(handle, - graph_view, - vertex_frontier.bucket(bucket_idx_cur), - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - e_op, - reduce_op::any()); + rmm::device_uvector new_frontier_vertex_buffer(0, handle.get_stream()); + { + bottomup_e_op_t e_op{}; + bottomup_pred_op_t pred_op{}; + pred_op.prev_visited_flags = + detail::edge_partition_endpoint_property_device_view_t( + prev_dst_visited_flags.view()); + pred_op.dst_first = graph_view.local_edge_partition_dst_range_first(); + + rmm::device_uvector predecessor_buffer( + vertex_frontier.bucket(bucket_idx_cur).size(), handle.get_stream()); + per_v_transform_reduce_if_outgoing_e(handle, + graph_view, + vertex_frontier.bucket(bucket_idx_cur), + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + e_op, + invalid_vertex, + reduce_op::any(), + pred_op, + predecessor_buffer.begin(), + true); + auto input_pair_first = thrust::make_zip_iterator(thrust::make_constant_iterator(depth + 1), + predecessor_buffer.begin()); + + // FIXME: this scatter_if and the resize below can be concurrently executed. + thrust::scatter_if( + handle.get_thrust_policy(), + input_pair_first, + input_pair_first + predecessor_buffer.size(), + thrust::make_transform_iterator( + vertex_frontier.bucket(bucket_idx_cur).cbegin(), + detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + predecessor_buffer.begin(), + thrust::make_zip_iterator(distances, predecessor_first), + detail::is_not_equal_t{invalid_vertex}); + + new_frontier_vertex_buffer.resize(predecessor_buffer.size(), handle.get_stream()); + new_frontier_vertex_buffer.resize( + thrust::distance(new_frontier_vertex_buffer.begin(), + thrust::copy_if(handle.get_thrust_policy(), + vertex_frontier.bucket(bucket_idx_cur).cbegin(), + vertex_frontier.bucket(bucket_idx_cur).cend(), + predecessor_buffer.begin(), + new_frontier_vertex_buffer.begin(), + detail::is_not_equal_t{invalid_vertex})), + handle.get_stream()); - auto input_pair_first = thrust::make_zip_iterator(thrust::make_constant_iterator(depth + 1), - predecessor_buffer.begin()); - thrust::scatter( - handle.get_thrust_policy(), - input_pair_first, - input_pair_first + new_frontier_vertex_buffer.size(), - thrust::make_transform_iterator( + assert(direction_optimizing); + + thrust::for_each( + handle.get_thrust_policy(), new_frontier_vertex_buffer.begin(), - detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), - thrust::make_zip_iterator(distances, predecessor_first)); + new_frontier_vertex_buffer.end(), + [bitmap = raft::device_span((*aux_info).visited_bitmap.data(), + (*aux_info).visited_bitmap.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + cuda::atomic_ref word( + bitmap[packed_bool_offset(v_offset)]); + word.fetch_or(packed_bool_mask(v_offset), cuda::std::memory_order_relaxed); + }); + (*((*aux_info).nzd_unvisited_vertices)) + .resize( + thrust::distance( + (*((*aux_info).nzd_unvisited_vertices)).begin(), + thrust::remove_if( + handle.get_thrust_policy(), + (*((*aux_info).nzd_unvisited_vertices)).begin(), + (*((*aux_info).nzd_unvisited_vertices)).end(), + [bitmap = raft::device_span((*aux_info).visited_bitmap.data(), + (*aux_info).visited_bitmap.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + auto word = bitmap[packed_bool_offset(v_offset)]; + return ((word & packed_bool_mask(v_offset)) != packed_bool_empty_mask()); + })), + handle.get_stream()); - assert(direction_optimizing); + if (segment_offsets) { + auto key_segment_offsets = compute_key_segment_offsets( + new_frontier_vertex_buffer.begin(), + new_frontier_vertex_buffer.end(), + raft::host_span((*segment_offsets).data(), (*segment_offsets).size()), + graph_view.local_vertex_partition_range_first(), + handle.get_stream()); + *((*aux_info).num_nzd_unvisited_low_degree_vertices) -= + key_segment_offsets[3] - key_segment_offsets[2]; + if (graph_view.use_dcs()) { + *((*aux_info).num_nzd_unvisited_hypersparse_vertices) -= + key_segment_offsets[4] - key_segment_offsets[3]; + } + } + } - { - rmm::device_uvector tmp_vertices((*nzd_unvisited_vertices).size(), - handle.get_stream()); - tmp_vertices.resize( - thrust::distance(tmp_vertices.begin(), - thrust::set_difference(handle.get_thrust_policy(), - (*nzd_unvisited_vertices).begin(), - (*nzd_unvisited_vertices).end(), - new_frontier_vertex_buffer.begin(), - new_frontier_vertex_buffer.end(), - tmp_vertices.begin())), + next_aggregate_frontier_size = static_cast(new_frontier_vertex_buffer.size()); + auto aggregate_nzd_unvisited_vertices = + static_cast((*((*aux_info).nzd_unvisited_vertices)).size()); + if constexpr (GraphViewType::is_multi_gpu) { + auto tmp = host_scalar_allreduce( + handle.get_comms(), + thrust::make_tuple(next_aggregate_frontier_size, aggregate_nzd_unvisited_vertices), + raft::comms::op_t::SUM, handle.get_stream()); - nzd_unvisited_vertices = std::move(tmp_vertices); + next_aggregate_frontier_size = thrust::get<0>(tmp); + aggregate_nzd_unvisited_vertices = thrust::get<1>(tmp); } - next_aggregate_vertex_frontier_size = - GraphViewType::is_multi_gpu - ? host_scalar_allreduce(handle.get_comms(), - static_cast(new_frontier_vertex_buffer.size()), - raft::comms::op_t::SUM, - handle.get_stream()) - : static_cast(new_frontier_vertex_buffer.size()); - if (next_aggregate_vertex_frontier_size == 0) { break; } + if (next_aggregate_frontier_size == 0) { break; } fill_edge_dst_property(handle, graph_view, @@ -429,21 +693,13 @@ void bfs(raft::handle_t const& handle, prev_dst_visited_flags.mutable_view(), true); - auto aggregate_nzd_unvisted_vertices = - GraphViewType::is_multi_gpu - ? host_scalar_allreduce(handle.get_comms(), - static_cast((*nzd_unvisited_vertices).size()), - raft::comms::op_t::SUM, - handle.get_stream()) - : static_cast((*nzd_unvisited_vertices).size()); - - if ((next_aggregate_vertex_frontier_size * direction_optimizing_beta < - aggregate_nzd_unvisted_vertices) && - (next_aggregate_vertex_frontier_size < cur_aggregate_vertex_frontier_size)) { - top_down = true; + if ((next_aggregate_frontier_size * direction_optimizing_beta < + aggregate_nzd_unvisited_vertices) && + (next_aggregate_frontier_size < cur_aggregate_frontier_size)) { + topdown = true; } - if (top_down) { // swithcing to top-down + if (topdown) { // swithcing to top-down vertex_frontier.bucket(bucket_idx_cur) = key_bucket_t( handle, std::move(new_frontier_vertex_buffer)); @@ -451,11 +707,11 @@ void bfs(raft::handle_t const& handle, vertex_frontier.bucket(bucket_idx_cur) = key_bucket_t( handle, - raft::device_span((*nzd_unvisited_vertices).data(), - (*nzd_unvisited_vertices).size())); + raft::device_span((*((*aux_info).nzd_unvisited_vertices)).data(), + ((*(*aux_info).nzd_unvisited_vertices)).size())); } } - cur_aggregate_vertex_frontier_size = next_aggregate_vertex_frontier_size; + cur_aggregate_frontier_size = next_aggregate_frontier_size; depth++; if (depth >= depth_limit) { break; } diff --git a/cpp/src/traversal/extract_bfs_paths_impl.cuh b/cpp/src/traversal/extract_bfs_paths_impl.cuh index 40030e2e39c..d228460bec3 100644 --- a/cpp/src/traversal/extract_bfs_paths_impl.cuh +++ b/cpp/src/traversal/extract_bfs_paths_impl.cuh @@ -220,11 +220,15 @@ std::tuple, vertex_t> extract_bfs_paths( detail::decrement_position{}); if constexpr (multi_gpu) { - current_frontier = collect_values_for_int_vertices(handle, - current_frontier.begin(), - current_frontier.end(), - predecessors, - h_vertex_partition_range_lasts); + auto& comm = handle.get_comms(); + current_frontier = + collect_values_for_int_vertices(comm, + current_frontier.begin(), + current_frontier.end(), + predecessors, + h_vertex_partition_range_lasts, + graph_view.local_vertex_partition_range_first(), + handle.get_stream()); } else { thrust::transform(handle.get_thrust_policy(), current_frontier.begin(), diff --git a/cpp/src/traversal/k_hop_nbrs_impl.cuh b/cpp/src/traversal/k_hop_nbrs_impl.cuh index acf3cfe8fc5..44fa21a5252 100644 --- a/cpp/src/traversal/k_hop_nbrs_impl.cuh +++ b/cpp/src/traversal/k_hop_nbrs_impl.cuh @@ -16,7 +16,7 @@ #pragma once #include "prims/reduce_op.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/vertex_frontier.cuh" #include @@ -147,15 +147,15 @@ k_hop_nbrs(raft::handle_t const& handle, rmm::device_uvector nbrs(0, handle.get_stream()); for (size_t iter = 0; iter < k; ++iter) { auto new_frontier_key_buffer = - transform_reduce_v_frontier_outgoing_e_by_dst(handle, - push_graph_view, - frontier.bucket(bucket_idx_cur), - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - e_op_t{}, - reduce_op::null{}, - do_expensive_check); + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst(handle, + push_graph_view, + frontier.bucket(bucket_idx_cur), + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + e_op_t{}, + reduce_op::null{}, + do_expensive_check); if (iter < (k - 1)) { frontier.bucket(bucket_idx_cur).clear(); frontier.bucket(bucket_idx_cur) diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index e1b7444b92f..b3cd0d57c67 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -22,7 +22,7 @@ #include "prims/kv_store.cuh" #include "prims/reduce_op.cuh" #include "prims/transform_reduce_e.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -641,7 +641,6 @@ rmm::device_uvector od_shortest_distances( cutoff, invalid_distance}; detail::transform_reduce_v_frontier_call_e_op_t< - false, thrust::tuple, weight_t, vertex_t, @@ -653,8 +652,8 @@ rmm::device_uvector od_shortest_distances( auto new_frontier_tagged_vertex_buffer = allocate_dataframe_buffer>(0, handle.get_stream()); - std::tie(new_frontier_tagged_vertex_buffer, distance_buffer) = detail:: - extract_transform_v_frontier_e, weight_t>( + std::tie(new_frontier_tagged_vertex_buffer, distance_buffer) = + detail::extract_transform_v_frontier_e, weight_t>( handle, graph_view, vertex_frontier.bucket(bucket_idx_near), @@ -675,12 +674,14 @@ rmm::device_uvector od_shortest_distances( resize_dataframe_buffer(new_frontier_tagged_vertex_buffer, 0, handle.get_stream()); shrink_to_fit_dataframe_buffer(new_frontier_tagged_vertex_buffer, handle.get_stream()); - std::tie(new_frontier_keys, distance_buffer) = - detail::sort_and_reduce_buffer_elements>( + std::tie(new_frontier_keys, distance_buffer) = detail:: + sort_and_reduce_buffer_elements>( handle, std::move(new_frontier_keys), std::move(distance_buffer), - reduce_op::minimum()); + reduce_op::minimum(), + std::make_tuple(vertex_t{0}, graph_view.number_of_vertices()), + std::nullopt); } vertex_frontier.bucket(bucket_idx_near).clear(); diff --git a/cpp/src/traversal/sssp_impl.cuh b/cpp/src/traversal/sssp_impl.cuh index 47908524feb..3429672b151 100644 --- a/cpp/src/traversal/sssp_impl.cuh +++ b/cpp/src/traversal/sssp_impl.cuh @@ -19,7 +19,7 @@ #include "prims/fill_edge_src_dst_property.cuh" #include "prims/reduce_op.cuh" #include "prims/transform_reduce_e.cuh" -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "prims/update_v_frontier.cuh" #include "prims/vertex_frontier.cuh" @@ -197,7 +197,7 @@ void sssp(raft::handle_t const& handle, push_graph_view.local_vertex_partition_view()); auto [new_frontier_vertex_buffer, distance_predecessor_buffer] = - transform_reduce_v_frontier_outgoing_e_by_dst( + cugraph::transform_reduce_v_frontier_outgoing_e_by_dst( handle, push_graph_view, vertex_frontier.bucket(bucket_idx_cur_near), diff --git a/cpp/src/utilities/collect_comm.cuh b/cpp/src/utilities/collect_comm.cuh index 2197409fe26..dc4267aac57 100644 --- a/cpp/src/utilities/collect_comm.cuh +++ b/cpp/src/utilities/collect_comm.cuh @@ -50,79 +50,73 @@ namespace cugraph { -// for the keys in kv_store_view, key_to_gpu_id_op(key) should coincide with comm.get_rank() -template -decltype(allocate_dataframe_buffer(0, - rmm::cuda_stream_view{})) -collect_values_for_keys(raft::handle_t const& handle, - KVStoreViewType kv_store_view, - KeyIterator collect_key_first, - KeyIterator collect_key_last, - KeyToGPUIdOp key_to_gpu_id_op) +// for the keys in kv_store_view, key_to_comm_rank_op(key) should coincide with comm.get_rank() +template +dataframe_buffer_type_t collect_values_for_keys( + raft::comms::comms_t const& comm, + KVStoreViewType kv_store_view, + KeyIterator collect_key_first, + KeyIterator collect_key_last, + KeyToCommRankOp key_to_comm_rank_op, + rmm::cuda_stream_view stream_view) { using key_t = typename KVStoreViewType::key_type; static_assert(std::is_same_v::value_type, key_t>); using value_t = typename KVStoreViewType::value_type; - auto& comm = handle.get_comms(); - // 1. collect values for the unique keys in [collect_key_first, collect_key_last) rmm::device_uvector unique_keys(thrust::distance(collect_key_first, collect_key_last), - handle.get_stream()); + stream_view); thrust::copy( - handle.get_thrust_policy(), collect_key_first, collect_key_last, unique_keys.begin()); - thrust::sort(handle.get_thrust_policy(), unique_keys.begin(), unique_keys.end()); + rmm::exec_policy_nosync(stream_view), collect_key_first, collect_key_last, unique_keys.begin()); + thrust::sort(rmm::exec_policy_nosync(stream_view), unique_keys.begin(), unique_keys.end()); unique_keys.resize( thrust::distance( unique_keys.begin(), - thrust::unique(handle.get_thrust_policy(), unique_keys.begin(), unique_keys.end())), - handle.get_stream()); + thrust::unique(rmm::exec_policy(stream_view), unique_keys.begin(), unique_keys.end())), + stream_view); - auto values_for_unique_keys = allocate_dataframe_buffer(0, handle.get_stream()); + auto values_for_unique_keys = allocate_dataframe_buffer(0, stream_view); { - rmm::device_uvector rx_unique_keys(0, handle.get_stream()); + rmm::device_uvector rx_unique_keys(0, stream_view); std::vector rx_value_counts{}; std::tie(rx_unique_keys, rx_value_counts) = groupby_gpu_id_and_shuffle_values( comm, unique_keys.begin(), unique_keys.end(), - [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, - handle.get_stream()); + [key_to_comm_rank_op] __device__(auto val) { return key_to_comm_rank_op(val); }, + stream_view); auto values_for_rx_unique_keys = - allocate_dataframe_buffer(rx_unique_keys.size(), handle.get_stream()); + allocate_dataframe_buffer(rx_unique_keys.size(), stream_view); kv_store_view.find(rx_unique_keys.begin(), rx_unique_keys.end(), get_dataframe_buffer_begin(values_for_rx_unique_keys), - handle.get_stream()); + stream_view); - auto rx_values_for_unique_keys = allocate_dataframe_buffer(0, handle.get_stream()); - std::tie(rx_values_for_unique_keys, std::ignore) = - shuffle_values(comm, - get_dataframe_buffer_begin(values_for_rx_unique_keys), - rx_value_counts, - handle.get_stream()); + auto rx_values_for_unique_keys = allocate_dataframe_buffer(0, stream_view); + std::tie(rx_values_for_unique_keys, std::ignore) = shuffle_values( + comm, get_dataframe_buffer_begin(values_for_rx_unique_keys), rx_value_counts, stream_view); values_for_unique_keys = std::move(rx_values_for_unique_keys); } // 2. build a kv_store_t object for the k, v pairs in unique_keys, values_for_unique_keys. - kv_store_t unique_key_value_store( - handle.get_stream()); + kv_store_t unique_key_value_store(stream_view); if constexpr (KVStoreViewType::binary_search) { unique_key_value_store = kv_store_t(std::move(unique_keys), std::move(values_for_unique_keys), kv_store_view.invalid_value(), false, - handle.get_stream()); + stream_view); } else { auto kv_pair_first = thrust::make_zip_iterator( thrust::make_tuple(unique_keys.begin(), get_dataframe_buffer_begin(values_for_unique_keys))); auto valid_kv_pair_last = - thrust::remove_if(handle.get_thrust_policy(), + thrust::remove_if(rmm::exec_policy(stream_view), kv_pair_first, kv_pair_first + unique_keys.size(), [invalid_value = kv_store_view.invalid_value()] __device__(auto pair) { @@ -136,176 +130,173 @@ collect_values_for_keys(raft::handle_t const& handle, get_dataframe_buffer_begin(values_for_unique_keys), kv_store_view.invalid_key(), kv_store_view.invalid_value(), - handle.get_stream()); + stream_view); - unique_keys.resize(0, handle.get_stream()); - resize_dataframe_buffer(values_for_unique_keys, 0, handle.get_stream()); - unique_keys.shrink_to_fit(handle.get_stream()); - shrink_to_fit_dataframe_buffer(values_for_unique_keys, handle.get_stream()); + unique_keys.resize(0, stream_view); + resize_dataframe_buffer(values_for_unique_keys, 0, stream_view); + unique_keys.shrink_to_fit(stream_view); + shrink_to_fit_dataframe_buffer(values_for_unique_keys, stream_view); } auto unique_key_value_store_view = unique_key_value_store.view(); // 3. find values for [collect_key_first, collect_key_last) auto value_buffer = allocate_dataframe_buffer( - thrust::distance(collect_key_first, collect_key_last), handle.get_stream()); - unique_key_value_store_view.find(collect_key_first, - collect_key_last, - get_dataframe_buffer_begin(value_buffer), - handle.get_stream()); + thrust::distance(collect_key_first, collect_key_last), stream_view); + unique_key_value_store_view.find( + collect_key_first, collect_key_last, get_dataframe_buffer_begin(value_buffer), stream_view); return value_buffer; } -// for the keys in kv_store_view, key_to_gpu_id_op(key) should coincide with comm.get_rank() -template +// for the keys in kv_store_view, key_to_comm_rank_op(key) should coincide with comm.get_rank() +template std::tuple, - decltype(allocate_dataframe_buffer( - 0, cudaStream_t{nullptr}))> + dataframe_buffer_type_t> collect_values_for_unique_keys( - raft::handle_t const& handle, + raft::comms::comms_t const& comm, KVStoreViewType kv_store_view, rmm::device_uvector&& collect_unique_keys, - KeyToGPUIdOp key_to_gpu_id_op) + KeyToCommRankOp key_to_comm_rank_op, + rmm::cuda_stream_view stream_view) { using key_t = typename KVStoreViewType::key_type; using value_t = typename KVStoreViewType::value_type; - auto& comm = handle.get_comms(); - - auto values_for_collect_unique_keys = allocate_dataframe_buffer(0, handle.get_stream()); + auto values_for_collect_unique_keys = allocate_dataframe_buffer(0, stream_view); { auto [rx_unique_keys, rx_value_counts] = groupby_gpu_id_and_shuffle_values( comm, collect_unique_keys.begin(), collect_unique_keys.end(), - [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); }, - handle.get_stream()); + [key_to_comm_rank_op] __device__(auto val) { return key_to_comm_rank_op(val); }, + stream_view); auto values_for_rx_unique_keys = - allocate_dataframe_buffer(rx_unique_keys.size(), handle.get_stream()); + allocate_dataframe_buffer(rx_unique_keys.size(), stream_view); kv_store_view.find(rx_unique_keys.begin(), rx_unique_keys.end(), get_dataframe_buffer_begin(values_for_rx_unique_keys), - handle.get_stream()); + stream_view); - std::tie(values_for_collect_unique_keys, std::ignore) = - shuffle_values(comm, - get_dataframe_buffer_begin(values_for_rx_unique_keys), - rx_value_counts, - handle.get_stream()); + std::tie(values_for_collect_unique_keys, std::ignore) = shuffle_values( + comm, get_dataframe_buffer_begin(values_for_rx_unique_keys), rx_value_counts, stream_view); } return std::make_tuple(std::move(collect_unique_keys), std::move(values_for_collect_unique_keys)); } template -std::tuple< - rmm::device_uvector, - decltype(allocate_dataframe_buffer::value_type>( - 0, cudaStream_t{nullptr}))> -collect_values_for_unique_int_vertices(raft::handle_t const& handle, - rmm::device_uvector&& collect_unique_int_vertices, - ValueIterator local_value_first, - std::vector const& vertex_partition_range_lasts) +dataframe_buffer_type_t::value_type> +collect_values_for_sorted_unique_int_vertices( + raft::comms::comms_t const& comm, + raft::device_span collect_sorted_unique_int_vertices, + ValueIterator local_value_first, + std::vector const& comm_rank_vertex_partition_range_lasts, + vertex_t local_vertex_partition_range_first, + rmm::cuda_stream_view stream_view) { using value_t = typename thrust::iterator_traits::value_type; - auto& comm = handle.get_comms(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto const major_comm_rank = major_comm.get_rank(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - auto const minor_comm_rank = minor_comm.get_rank(); + // 1.find tx_counts - // 1. groupby and shuffle internal vertices + rmm::device_uvector d_range_lasts(comm_rank_vertex_partition_range_lasts.size(), + stream_view); + raft::update_device(d_range_lasts.data(), + comm_rank_vertex_partition_range_lasts.data(), + comm_rank_vertex_partition_range_lasts.size(), + stream_view); - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); + rmm::device_uvector d_offsets(d_range_lasts.size() - 1, stream_view); + thrust::lower_bound(rmm::exec_policy_nosync(stream_view), + collect_sorted_unique_int_vertices.begin(), + collect_sorted_unique_int_vertices.end(), + d_range_lasts.begin(), + d_range_lasts.begin() + (d_range_lasts.size() - 1), + d_offsets.begin()); - auto [rx_int_vertices, rx_int_vertex_counts] = groupby_gpu_id_and_shuffle_values( - comm, - collect_unique_int_vertices.begin(), - collect_unique_int_vertices.end(), - detail::compute_gpu_id_from_int_vertex_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - major_comm_size, - minor_comm_size}, - handle.get_stream()); - - // 2: Lookup return values - - auto vertex_partition_id = - partition_manager::compute_vertex_partition_id_from_graph_subcomm_ranks( - major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank); - auto local_int_vertex_first = - vertex_partition_id == 0 ? vertex_t{0} : vertex_partition_range_lasts[vertex_partition_id - 1]; - - auto value_buffer = - allocate_dataframe_buffer(rx_int_vertices.size(), handle.get_stream()); - thrust::transform(handle.get_thrust_policy(), + std::vector h_offsets(d_offsets.size() + 2); + raft::update_host(h_offsets.data() + 1, d_offsets.data(), d_offsets.size(), stream_view); + h_offsets[0] = 0; + h_offsets.back() = collect_sorted_unique_int_vertices.size(); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view)); + + std::vector tx_counts(comm_rank_vertex_partition_range_lasts.size()); + std::adjacent_difference(h_offsets.begin() + 1, h_offsets.end(), tx_counts.begin()); + + // 2. shuffle sorted unique internal vertices to the owning ranks + + auto [rx_int_vertices, rx_counts] = + shuffle_values(comm, collect_sorted_unique_int_vertices.begin(), tx_counts, stream_view); + + // 3.Lookup return values + + auto value_buffer = allocate_dataframe_buffer(rx_int_vertices.size(), stream_view); + thrust::transform(rmm::exec_policy_nosync(stream_view), rx_int_vertices.begin(), rx_int_vertices.end(), get_dataframe_buffer_begin(value_buffer), - [local_value_first, local_int_vertex_first] __device__(auto v) { - return local_value_first[v - local_int_vertex_first]; + [local_value_first, local_vertex_partition_range_first] __device__(auto v) { + return local_value_first[v - local_vertex_partition_range_first]; }); + rx_int_vertices.resize(0, stream_view); + rx_int_vertices.shrink_to_fit(stream_view); - // 3: Shuffle results back to original GPU + // 4. Shuffle results back to the original ranks - std::tie(value_buffer, std::ignore) = shuffle_values( - comm, get_dataframe_buffer_begin(value_buffer), rx_int_vertex_counts, handle.get_stream()); + std::tie(value_buffer, std::ignore) = + shuffle_values(comm, get_dataframe_buffer_begin(value_buffer), rx_counts, stream_view); - return std::make_tuple(std::move(collect_unique_int_vertices), std::move(value_buffer)); + return value_buffer; } template -decltype(allocate_dataframe_buffer::value_type>( - 0, cudaStream_t{nullptr})) +dataframe_buffer_type_t::value_type> collect_values_for_int_vertices( - raft::handle_t const& handle, + raft::comms::comms_t const& comm, VertexIterator collect_vertex_first, VertexIterator collect_vertex_last, ValueIterator local_value_first, std::vector::value_type> const& - vertex_partition_range_lasts) + comm_rank_vertex_partition_range_lasts, + typename thrust::iterator_traits::value_type local_vertex_partition_range_first, + rmm::cuda_stream_view stream_view) { using vertex_t = typename thrust::iterator_traits::value_type; using value_t = typename thrust::iterator_traits::value_type; size_t input_size = thrust::distance(collect_vertex_first, collect_vertex_last); - rmm::device_uvector sorted_unique_int_vertices(input_size, handle.get_stream()); + rmm::device_uvector sorted_unique_int_vertices(input_size, stream_view); - raft::copy( - sorted_unique_int_vertices.data(), collect_vertex_first, input_size, handle.get_stream()); + raft::copy(sorted_unique_int_vertices.data(), collect_vertex_first, input_size, stream_view); - thrust::sort(handle.get_thrust_policy(), + thrust::sort(rmm::exec_policy_nosync(stream_view), sorted_unique_int_vertices.begin(), sorted_unique_int_vertices.end()); - auto last = thrust::unique(handle.get_thrust_policy(), + auto last = thrust::unique(rmm::exec_policy(stream_view), sorted_unique_int_vertices.begin(), sorted_unique_int_vertices.end()); sorted_unique_int_vertices.resize(thrust::distance(sorted_unique_int_vertices.begin(), last), - handle.get_stream()); - - auto [unique_int_vertices, tmp_value_buffer] = collect_values_for_unique_int_vertices( - handle, std::move(sorted_unique_int_vertices), local_value_first, vertex_partition_range_lasts); + stream_view); - kv_store_t kv_map(std::move(unique_int_vertices), + auto tmp_value_buffer = collect_values_for_sorted_unique_int_vertices( + comm, + raft::device_span(sorted_unique_int_vertices.data(), + sorted_unique_int_vertices.size()), + local_value_first, + comm_rank_vertex_partition_range_lasts, + local_vertex_partition_range_first, + stream_view); + + kv_store_t kv_map(std::move(sorted_unique_int_vertices), std::move(tmp_value_buffer), invalid_vertex_id::value, false, - handle.get_stream()); + stream_view); auto device_view = detail::kv_binary_search_store_device_view_t(kv_map.view()); - auto value_buffer = allocate_dataframe_buffer(input_size, handle.get_stream()); - thrust::transform(handle.get_thrust_policy(), + auto value_buffer = allocate_dataframe_buffer(input_size, stream_view); + thrust::transform(rmm::exec_policy_nosync(stream_view), collect_vertex_first, collect_vertex_last, get_dataframe_buffer_begin(value_buffer), diff --git a/cpp/src/utilities/cugraph_ops_utils.hpp b/cpp/src/utilities/cugraph_ops_utils.hpp deleted file mode 100644 index bb014b9d7d6..00000000000 --- a/cpp/src/utilities/cugraph_ops_utils.hpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace cugraph { -namespace detail { - -template -ops::graph::csc get_graph( - graph_view_t const& gview) -{ - ops::graph::csc graph; - graph.n_src_nodes = gview.number_of_vertices(); - graph.n_dst_nodes = gview.number_of_vertices(); - graph.n_indices = gview.local_edge_partition_view().number_of_edges(); - // FIXME this is sufficient for now, but if there is a fast (cached) way - // of getting max degree, use that instead - graph.dst_max_in_degree = std::numeric_limits::max(); - // FIXME: this is evil and is just temporary until we have a matching type in cugraph-ops - // or we change the type accepted by the functions calling into cugraph-ops - graph.offsets = const_cast(gview.local_edge_partition_view().offsets().data()); - graph.indices = const_cast(gview.local_edge_partition_view().indices().data()); - return graph; -} - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/utilities/shuffle_vertex_pairs.cuh b/cpp/src/utilities/shuffle_vertex_pairs.cuh index 70327db5ffb..1cf2493cd28 100644 --- a/cpp/src/utilities/shuffle_vertex_pairs.cuh +++ b/cpp/src/utilities/shuffle_vertex_pairs.cuh @@ -61,10 +61,10 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( (edge_ids ? sizeof(edge_t) : size_t{0}) + (edge_types ? sizeof(edge_type_t) : size_t{0}); auto constexpr mem_frugal_ratio = - 0.1; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the - // total_global_mem, switch to the memory frugal approach (thrust::sort is used to - // group-by by default, and thrust::sort requires temporary buffer comparable to the input - // data size) + 0.05; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the + // total_global_mem, switch to the memory frugal approach (thrust::sort is used to + // group-by by default, and thrust::sort requires temporary buffer comparable to the + // input data size) auto mem_frugal_threshold = static_cast(static_cast(total_global_mem / element_size) * mem_frugal_ratio); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a2eeafea8cf..44963f91515 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -698,9 +698,9 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu) ############################################################################################### - # - MG PRIMS TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_SRC_DST tests -------------------------- - ConfigureTestMG(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_SRC_DST_TEST - prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu) + # - MG PRIMS TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST tests ------------------------------ + ConfigureTestMG(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST_TEST + prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu) ############################################################################################### # - MG PRIMS REDUCE_V tests ------------------------------------------------------------------- diff --git a/cpp/tests/c_api/mg_test_utils.h b/cpp/tests/c_api/mg_test_utils.h index b040d8dc529..12ca16bfe35 100644 --- a/cpp/tests/c_api/mg_test_utils.h +++ b/cpp/tests/c_api/mg_test_utils.h @@ -36,6 +36,15 @@ } \ } while (0) +#define C_NCCL_TRY(call) \ + do { \ + ncclResult_t status = call; \ + if (ncclSuccess != status) { \ + printf("NCCL call='%s' at file=%s line=%d failed.", #call, __FILE__, __LINE__); \ + exit(1); \ + } \ + } while (0) + #define C_CUDA_TRY(call) \ do { \ cudaError_t const status = call; \ diff --git a/cpp/tests/community/balanced_edge_test.cpp b/cpp/tests/community/balanced_edge_test.cpp index 614a4ee4190..c4488dc9b9e 100644 --- a/cpp/tests/community/balanced_edge_test.cpp +++ b/cpp/tests/community/balanced_edge_test.cpp @@ -15,7 +15,7 @@ #include #include -TEST(balanced_edge, DISABLED_success) +TEST(balanced_edge, success) { std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, diff --git a/cpp/tests/link_analysis/mg_hits_test.cpp b/cpp/tests/link_analysis/mg_hits_test.cpp index eb2a9bcd721..83e76472260 100644 --- a/cpp/tests/link_analysis/mg_hits_test.cpp +++ b/cpp/tests/link_analysis/mg_hits_test.cpp @@ -91,7 +91,7 @@ class Tests_MGHits : public ::testing::TestWithParam d_mg_hubs(mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu similarity index 74% rename from cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu rename to cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index 9b7e24856fe..085077017b3 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_src_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "prims/transform_reduce_v_frontier_outgoing_e_by_src_dst.cuh" +#include "prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh" #include "prims/vertex_frontier.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" @@ -203,48 +203,7 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); - hr_timer.start("MG transform_reduce_v_frontier_outgoing_e_by_src"); - } - - auto mg_reduce_by_src_new_frontier_key_buffer = - cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); - [[maybe_unused]] auto mg_reduce_by_src_payload_buffer = - cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); - - if constexpr (std::is_same_v) { - mg_reduce_by_src_new_frontier_key_buffer = - cugraph::transform_reduce_v_frontier_outgoing_e_by_src( - *handle_, - mg_graph_view, - mg_vertex_frontier.bucket(bucket_idx_cur), - mg_src_prop.view(), - mg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - e_op_t{}, - cugraph::reduce_op::null{}); - } else { - std::tie(mg_reduce_by_src_new_frontier_key_buffer, mg_reduce_by_src_payload_buffer) = - cugraph::transform_reduce_v_frontier_outgoing_e_by_src( - *handle_, - mg_graph_view, - mg_vertex_frontier.bucket(bucket_idx_cur), - mg_src_prop.view(), - mg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - e_op_t{}, - cugraph::reduce_op::plus{}); - } - - if (cugraph::test::g_perf) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - handle_->get_comms().barrier(); - hr_timer.stop(); - hr_timer.display_and_clear(std::cout); - } - if (cugraph::test::g_perf) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - handle_->get_comms().barrier(); - hr_timer.start("MG transform_reduce_v_frontier_outgoing_e_by_src"); + hr_timer.start("MG transform_reduce_v_frontier_outgoing_e_by_dst"); } auto mg_reduce_by_dst_new_frontier_key_buffer = @@ -286,64 +245,27 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst // 3. compare SG & MG results if (prims_usecase.check_correctness) { - if constexpr (std::is_same_v) { - cugraph::unrenumber_int_vertices( - *handle_, - mg_reduce_by_src_new_frontier_key_buffer.begin(), - mg_reduce_by_src_new_frontier_key_buffer.size(), - (*mg_renumber_map).data(), - mg_graph_view.vertex_partition_range_lasts()); - - cugraph::unrenumber_int_vertices( - *handle_, - mg_reduce_by_dst_new_frontier_key_buffer.begin(), - mg_reduce_by_dst_new_frontier_key_buffer.size(), - (*mg_renumber_map).data(), - mg_graph_view.vertex_partition_range_lasts()); - } else { - cugraph::unrenumber_int_vertices( - *handle_, - std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).begin(), - std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).size(), - (*mg_renumber_map).data(), - mg_graph_view.vertex_partition_range_lasts()); - - cugraph::unrenumber_int_vertices( - *handle_, - std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).begin(), - std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).size(), - (*mg_renumber_map).data(), - mg_graph_view.vertex_partition_range_lasts()); - } - - auto mg_reduce_by_src_aggregate_new_frontier_key_buffer = - cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); - if constexpr (std::is_same_v) { - mg_reduce_by_src_aggregate_new_frontier_key_buffer = - cugraph::test::device_gatherv(*handle_, - mg_reduce_by_src_new_frontier_key_buffer.data(), - mg_reduce_by_src_new_frontier_key_buffer.size()); - } else { - std::get<0>(mg_reduce_by_src_aggregate_new_frontier_key_buffer) = - cugraph::test::device_gatherv( - *handle_, - std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).data(), - std::get<0>(mg_reduce_by_src_new_frontier_key_buffer).size()); - std::get<1>(mg_reduce_by_src_aggregate_new_frontier_key_buffer) = - cugraph::test::device_gatherv( - *handle_, - std::get<1>(mg_reduce_by_src_new_frontier_key_buffer).data(), - std::get<1>(mg_reduce_by_src_new_frontier_key_buffer).size()); - } - auto mg_reduce_by_dst_aggregate_new_frontier_key_buffer = cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); if constexpr (std::is_same_v) { + cugraph::unrenumber_local_int_vertices(*handle_, + mg_reduce_by_dst_new_frontier_key_buffer.data(), + mg_reduce_by_dst_new_frontier_key_buffer.size(), + (*mg_renumber_map).data(), + mg_graph_view.local_vertex_partition_range_first(), + mg_graph_view.local_vertex_partition_range_last()); mg_reduce_by_dst_aggregate_new_frontier_key_buffer = cugraph::test::device_gatherv(*handle_, mg_reduce_by_dst_new_frontier_key_buffer.data(), mg_reduce_by_dst_new_frontier_key_buffer.size()); } else { + cugraph::unrenumber_local_int_vertices( + *handle_, + std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).data(), + std::get<0>(mg_reduce_by_dst_new_frontier_key_buffer).size(), + (*mg_renumber_map).data(), + mg_graph_view.local_vertex_partition_range_first(), + mg_graph_view.local_vertex_partition_range_last()); std::get<0>(mg_reduce_by_dst_aggregate_new_frontier_key_buffer) = cugraph::test::device_gatherv( *handle_, @@ -356,26 +278,6 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst std::get<1>(mg_reduce_by_dst_new_frontier_key_buffer).size()); } - [[maybe_unused]] auto mg_reduce_by_src_aggregate_payload_buffer = - cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); - if constexpr (!std::is_same_v) { - if constexpr (std::is_arithmetic_v) { - mg_reduce_by_src_aggregate_payload_buffer = - cugraph::test::device_gatherv(*handle_, - mg_reduce_by_src_payload_buffer.data(), - mg_reduce_by_src_payload_buffer.size()); - } else { - std::get<0>(mg_reduce_by_src_aggregate_payload_buffer) = - cugraph::test::device_gatherv(*handle_, - std::get<0>(mg_reduce_by_src_payload_buffer).data(), - std::get<0>(mg_reduce_by_src_payload_buffer).size()); - std::get<1>(mg_reduce_by_src_aggregate_payload_buffer) = - cugraph::test::device_gatherv(*handle_, - std::get<1>(mg_reduce_by_src_payload_buffer).data(), - std::get<1>(mg_reduce_by_src_payload_buffer).size()); - } - } - [[maybe_unused]] auto mg_reduce_by_dst_aggregate_payload_buffer = cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); if constexpr (!std::is_same_v) { @@ -409,22 +311,11 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst if (handle_->get_comms().get_rank() == int{0}) { if constexpr (std::is_same_v) { - thrust::sort( - handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(mg_reduce_by_src_aggregate_new_frontier_key_buffer)); - thrust::sort( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_reduce_by_dst_aggregate_new_frontier_key_buffer), cugraph::get_dataframe_buffer_end(mg_reduce_by_dst_aggregate_new_frontier_key_buffer)); } else { - thrust::sort_by_key( - handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(mg_reduce_by_src_aggregate_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_payload_buffer)); - thrust::sort_by_key( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_reduce_by_dst_aggregate_new_frontier_key_buffer), @@ -471,34 +362,6 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst .insert(cugraph::get_dataframe_buffer_begin(sg_key_buffer), cugraph::get_dataframe_buffer_end(sg_key_buffer)); - auto sg_reduce_by_src_new_frontier_key_buffer = - cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); - [[maybe_unused]] auto sg_reduce_by_src_payload_buffer = - cugraph::detail::allocate_optional_dataframe_buffer(0, handle_->get_stream()); - if constexpr (std::is_same_v) { - sg_reduce_by_src_new_frontier_key_buffer = - cugraph::transform_reduce_v_frontier_outgoing_e_by_src( - *handle_, - sg_graph_view, - sg_vertex_frontier.bucket(bucket_idx_cur), - sg_src_prop.view(), - sg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - e_op_t{}, - cugraph::reduce_op::null{}); - } else { - std::tie(sg_reduce_by_src_new_frontier_key_buffer, sg_reduce_by_src_payload_buffer) = - cugraph::transform_reduce_v_frontier_outgoing_e_by_src( - *handle_, - sg_graph_view, - sg_vertex_frontier.bucket(bucket_idx_cur), - sg_src_prop.view(), - sg_dst_prop.view(), - cugraph::edge_dummy_property_t{}.view(), - e_op_t{}, - cugraph::reduce_op::plus{}); - } - auto sg_reduce_by_dst_new_frontier_key_buffer = cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); [[maybe_unused]] auto sg_reduce_by_dst_payload_buffer = @@ -528,22 +391,11 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst } if constexpr (std::is_same_v) { - thrust::sort( - handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(sg_reduce_by_src_new_frontier_key_buffer)); - thrust::sort( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_new_frontier_key_buffer), cugraph::get_dataframe_buffer_end(sg_reduce_by_dst_new_frontier_key_buffer)); } else { - thrust::sort_by_key( - handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(sg_reduce_by_src_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_payload_buffer)); - thrust::sort_by_key( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_new_frontier_key_buffer), @@ -551,14 +403,7 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_payload_buffer)); } - bool key_passed = thrust::equal( - handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_end(sg_reduce_by_src_new_frontier_key_buffer), - cugraph::get_dataframe_buffer_begin(mg_reduce_by_src_aggregate_new_frontier_key_buffer)); - ASSERT_TRUE(key_passed); - - key_passed = thrust::equal( + auto key_passed = thrust::equal( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_new_frontier_key_buffer), cugraph::get_dataframe_buffer_end(sg_reduce_by_dst_new_frontier_key_buffer), @@ -567,13 +412,6 @@ class Tests_MGTransformReduceVFrontierOutgoingEBySrcDst if constexpr (!std::is_same_v) { bool payload_passed = thrust::equal( - handle_->get_thrust_policy(), - cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_payload_buffer), - cugraph::get_dataframe_buffer_begin(sg_reduce_by_src_payload_buffer), - cugraph::get_dataframe_buffer_end(mg_reduce_by_src_aggregate_payload_buffer)); - ASSERT_TRUE(payload_passed); - - payload_passed = thrust::equal( handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_payload_buffer), cugraph::get_dataframe_buffer_begin(sg_reduce_by_dst_payload_buffer), diff --git a/cpp/tests/traversal/mg_bfs_test.cpp b/cpp/tests/traversal/mg_bfs_test.cpp index 4d4b83e275b..3cd712798e3 100644 --- a/cpp/tests/traversal/mg_bfs_test.cpp +++ b/cpp/tests/traversal/mg_bfs_test.cpp @@ -100,16 +100,6 @@ class Tests_MGBFS : public ::testing::TestWithParam initialize_mg_handle(size_t pool_size = 64); +std::unique_ptr initialize_mg_handle( + size_t pool_size = 8 /* default value of CUDA_DEVICE_MAX_CONNECTIONS */); // NCCL lazily initializes for P2P, and this enforces P2P initialization for better performance // measurements diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 0a706d1cf80..ed96ba23917 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -24,7 +24,6 @@ #include #include #include -#include // legacy coo_to_csr #include @@ -234,7 +233,8 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { construct_edgelist(raft::handle_t const& handle, bool test_weighted, bool store_transposed, - bool multi_gpu) const + bool multi_gpu, + bool shuffle = true) const { CUGRAPH_EXPECTS( (size_t{1} << scale_) <= static_cast(std::numeric_limits::max()), @@ -246,7 +246,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { // cuMemAddressReserve // (https://developer.nvidia.com/blog/introducing-low-level-gpu-virtual-memory-management), we // can reduce the temporary memory requirement to (1 / num_partitions) * (original data size) - size_t constexpr num_partitions_per_gpu = 4; + size_t constexpr num_partitions_per_gpu = 8; size_t num_partitions = num_partitions_per_gpu * static_cast(multi_gpu ? handle.get_comms().get_size() : 1); @@ -330,7 +330,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { handle, std::move(tmp_src_v), std::move(tmp_dst_v), std::move(tmp_weights_v)); } - if (multi_gpu) { + if (multi_gpu && shuffle) { std::tie(store_transposed ? tmp_dst_v : tmp_src_v, store_transposed ? tmp_src_v : tmp_dst_v, tmp_weights_v, @@ -375,7 +375,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { translate(handle, vertex_v); - if (multi_gpu) { + if (multi_gpu && shuffle) { vertex_v = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( handle, std::move(vertex_v)); } @@ -391,6 +391,8 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { void set_edge_factor(size_t edge_factor) { edge_factor_ = edge_factor; } + bool undirected() const { return undirected_; } + private: size_t scale_{}; size_t edge_factor_{}; @@ -762,39 +764,5 @@ construct_graph(raft::handle_t const& handle, return std::make_tuple(std::move(graph), std::move(edge_weights), std::move(renumber_map)); } -namespace legacy { - -template -std::unique_ptr> construct_graph_csr( - raft::handle_t const& handle, input_usecase_t const& input_usecase, bool test_weighted) -{ - auto [d_src_v, d_dst_v, d_weight_v, d_vertices_v, is_symmetric] = - input_usecase.template construct_edgelist( - handle, test_weighted, false, false); - vertex_t num_vertices{}; // assuming that vertex IDs are non-negative consecutive integers - if (d_vertices_v) { - num_vertices = - max_element( - handle, raft::device_span((*d_vertices_v).data(), (*d_vertices_v).size())) + - 1; - } else { - num_vertices = - std::max( - max_element(handle, raft::device_span(d_src_v.data(), d_src_v.size())), - max_element(handle, raft::device_span(d_dst_v.data(), d_dst_v.size()))) + - 1; - } - - cugraph::legacy::GraphCOOView cooview( - d_src_v.data(), - d_dst_v.data(), - d_weight_v ? d_weight_v->data() : nullptr, - num_vertices, - static_cast(d_src_v.size())); - - return cugraph::coo_to_csr(cooview); -} - -} // namespace legacy } // namespace test } // namespace cugraph diff --git a/dependencies.yaml b/dependencies.yaml index f959f4089f5..93983a1a29b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -20,14 +20,10 @@ files: - depends_on_dask_cudf - depends_on_pylibraft - depends_on_raft_dask - # Deprecate pylibcugraphops - - depends_on_pylibcugraphops - depends_on_pylibwholegraph - depends_on_cupy - depends_on_pytorch - python_run_cugraph - - python_run_cugraph_dgl - - python_run_cugraph_pyg - test_notebook - test_python_common - test_python_cugraph @@ -43,8 +39,6 @@ files: - cuda_version - docs - py_version - # Deprecate pylibcugraphops - - depends_on_pylibcugraphops test_cpp: output: none includes: @@ -148,60 +142,6 @@ files: - depends_on_cudf - test_python_common - test_python_pylibcugraph - py_build_cugraph_dgl: - output: pyproject - pyproject_dir: python/cugraph-dgl - extras: - table: build-system - includes: - - python_build_rapids - - python_build_wheel - py_run_cugraph_dgl: - output: pyproject - pyproject_dir: python/cugraph-dgl - extras: - table: project - includes: - - python_run_cugraph_dgl - # Deprecate pylibcugraphops - - depends_on_pylibcugraphops - py_test_cugraph_dgl: - output: pyproject - pyproject_dir: python/cugraph-dgl - extras: - table: project.optional-dependencies - key: test - includes: - - test_python_common - - depends_on_pylibwholegraph - - depends_on_pytorch - py_build_cugraph_pyg: - output: pyproject - pyproject_dir: python/cugraph-pyg - extras: - table: build-system - includes: - - python_build_rapids - - python_build_wheel - py_run_cugraph_pyg: - output: pyproject - pyproject_dir: python/cugraph-pyg - extras: - table: project - includes: - - python_run_cugraph_pyg - # Deprecate pylibcugraphops - - depends_on_pylibcugraphops - py_test_cugraph_pyg: - output: pyproject - pyproject_dir: python/cugraph-pyg - extras: - table: project.optional-dependencies - key: test - includes: - - test_python_common - - depends_on_pylibwholegraph - - depends_on_pytorch py_build_cugraph_service_client: output: pyproject pyproject_dir: python/cugraph-service/client @@ -245,33 +185,10 @@ files: includes: - test_python_common - test_python_cugraph - cugraph_dgl_dev: - matrix: - cuda: ["11.8"] - output: conda - conda_dir: python/cugraph-dgl/conda - includes: - - checks - # Deprecate pylibcugraphops - - depends_on_pylibcugraphops - - cugraph_dgl_dev - - test_python_common - cugraph_pyg_dev: - matrix: - cuda: ["11.8"] - output: conda - conda_dir: python/cugraph-pyg/conda - includes: - - checks - # Deprecate pylibcugraphops - - depends_on_pylibcugraphops - - cugraph_pyg_dev - - test_python_common channels: - rapidsai - rapidsai-nightly - dask/label/dev - - dglteam/label/th23_cu118 - conda-forge - nvidia dependencies: @@ -373,12 +290,10 @@ dependencies: packages: - c-compiler - cxx-compiler - - libcudf==24.12.*,>=0.0.0a0 - # Deprecate libcugraphops - - libcugraphops==24.12.*,>=0.0.0a0 - - libraft-headers==24.12.*,>=0.0.0a0 - - libraft==24.12.*,>=0.0.0a0 - - librmm==24.12.*,>=0.0.0a0 + - libcudf==25.2.*,>=0.0.0a0 + - libraft-headers==25.2.*,>=0.0.0a0 + - libraft==25.2.*,>=0.0.0a0 + - librmm==25.2.*,>=0.0.0a0 - openmpi # Required for building cpp-mgtests (multi-GPU tests) specific: - output_types: [conda] @@ -468,8 +383,8 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - &dask rapids-dask-dependency==24.12.*,>=0.0.0a0 - - &dask_cuda dask-cuda==24.12.*,>=0.0.0a0 + - &dask rapids-dask-dependency==25.2.*,>=0.0.0a0 + - &dask_cuda dask-cuda==25.2.*,>=0.0.0a0 - &numba numba>=0.57 - &numpy numpy>=1.23,<3.0a0 - output_types: conda @@ -479,7 +394,7 @@ dependencies: - requests - nccl>=2.19 - ucx-proc=*=gpu - - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.42.*,>=0.0.0a0 - output_types: pyproject packages: # cudf uses fsspec but is protocol independent. cugraph @@ -492,59 +407,15 @@ dependencies: cuda: "11.*" cuda_suffixed: "true" packages: - - &ucx_py_cu11 ucx-py-cu11==0.41.*,>=0.0.0a0 + - &ucx_py_cu11 ucx-py-cu11==0.42.*,>=0.0.0a0 - matrix: cuda: "12.*" cuda_suffixed: "true" packages: - - &ucx_py_cu12 ucx-py-cu12==0.41.*,>=0.0.0a0 + - &ucx_py_cu12 ucx-py-cu12==0.42.*,>=0.0.0a0 - matrix: packages: - *ucx_py_unsuffixed - python_run_cugraph_dgl: - common: - - output_types: [conda, pyproject] - packages: - - *numba - - *numpy - specific: - - output_types: [pyproject] - matrices: - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - &cugraph_cu11 cugraph-cu11==24.12.*,>=0.0.0a0 - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - &cugraph_cu12 cugraph-cu12==24.12.*,>=0.0.0a0 - - matrix: - packages: - - &cugraph_unsuffixed cugraph==24.12.*,>=0.0.0a0 - python_run_cugraph_pyg: - common: - - output_types: [conda, pyproject] - packages: - - *numba - - *numpy - specific: - - output_types: [pyproject] - matrices: - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - *cugraph_cu11 - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - *cugraph_cu12 - - matrix: - packages: - - *cugraph_unsuffixed python_run_cugraph_service_client: common: - output_types: [conda, pyproject] @@ -569,20 +440,20 @@ dependencies: cuda: "11.*" cuda_suffixed: "true" packages: - - *cugraph_cu11 - - cugraph-service-client-cu11==24.12.*,>=0.0.0a0 + - &cugraph_cu11 cugraph-cu11==25.2.*,>=0.0.0a0 + - cugraph-service-client-cu11==25.2.*,>=0.0.0a0 - *ucx_py_cu11 - matrix: cuda: "12.*" cuda_suffixed: "true" packages: - - *cugraph_cu12 - - cugraph-service-client-cu12==24.12.*,>=0.0.0a0 + - &cugraph_cu12 cugraph-cu12==25.2.*,>=0.0.0a0 + - cugraph-service-client-cu12==25.2.*,>=0.0.0a0 - *ucx_py_cu12 - matrix: packages: - - *cugraph_unsuffixed - - cugraph-service-client==24.12.*,>=0.0.0a0 + - &cugraph_unsuffixed cugraph==25.2.*,>=0.0.0a0 + - cugraph-service-client==25.2.*,>=0.0.0a0 - *ucx_py_unsuffixed test_cpp: common: @@ -618,37 +489,19 @@ dependencies: - scikit-learn>=0.23.1 - output_types: [conda] packages: - - &pylibwholegraph_unsuffixed pylibwholegraph==24.12.*,>=0.0.0a0 + - &pylibwholegraph_unsuffixed pylibwholegraph==25.2.*,>=0.0.0a0 - *thrift test_python_pylibcugraph: common: - output_types: [conda, pyproject] packages: - *numpy - cugraph_dgl_dev: - common: - - output_types: [conda] - packages: - - *cugraph_unsuffixed - - &pytorch_conda pytorch>=2.3 - - pytorch-cuda==11.8 - - &tensordict tensordict>=0.1.2 - - dgl>=2.4.0.cu* - cugraph_pyg_dev: - common: - - output_types: [conda] - packages: - - *cugraph_unsuffixed - - *pytorch_conda - - pytorch-cuda==11.8 - - *tensordict - - pytorch_geometric>=2.5,<2.6 depends_on_pytorch: common: - output_types: [conda] packages: - - *pytorch_conda + - &pytorch_conda pytorch>=2.3 - torchdata - pydantic - ogb @@ -669,7 +522,7 @@ dependencies: - matrix: {cuda: "12.*"} packages: - &pytorch_pip torch>=2.3 - - *tensordict + - &tensordict tensordict>=0.1.2 - matrix: {cuda: "11.*"} packages: - *pytorch_pip @@ -693,19 +546,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibwholegraph-cu12==24.12.*,>=0.0.0a0 + - pylibwholegraph-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibwholegraph-cu11==24.12.*,>=0.0.0a0 + - pylibwholegraph-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*pylibwholegraph_unsuffixed]} depends_on_rmm: common: - output_types: conda packages: - - &rmm_unsuffixed rmm==24.12.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -718,19 +571,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==24.12.*,>=0.0.0a0 + - rmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - rmm-cu11==24.12.*,>=0.0.0a0 + - rmm-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*rmm_unsuffixed]} depends_on_cudf: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==24.12.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -743,19 +596,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==24.12.*,>=0.0.0a0 + - cudf-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf-cu11==24.12.*,>=0.0.0a0 + - cudf-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_unsuffixed]} depends_on_dask_cudf: common: - output_types: conda packages: - - &dask_cudf_unsuffixed dask-cudf==24.12.*,>=0.0.0a0 + - &dask_cudf_unsuffixed dask-cudf==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -768,19 +621,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - dask-cudf-cu12==24.12.*,>=0.0.0a0 + - dask-cudf-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - dask-cudf-cu11==24.12.*,>=0.0.0a0 + - dask-cudf-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*dask_cudf_unsuffixed]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==24.12.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -793,19 +646,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==24.12.*,>=0.0.0a0 + - pylibraft-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibraft-cu11==24.12.*,>=0.0.0a0 + - pylibraft-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed]} depends_on_raft_dask: common: - output_types: conda packages: - - &raft_dask_unsuffixed raft-dask==24.12.*,>=0.0.0a0 + - &raft_dask_unsuffixed raft-dask==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -818,19 +671,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - raft-dask-cu12==24.12.*,>=0.0.0a0 + - raft-dask-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - raft-dask-cu11==24.12.*,>=0.0.0a0 + - raft-dask-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*raft_dask_unsuffixed]} depends_on_pylibcugraph: common: - output_types: conda packages: - - &pylibcugraph_unsuffixed pylibcugraph==24.12.*,>=0.0.0a0 + - &pylibcugraph_unsuffixed pylibcugraph==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -843,40 +696,14 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibcugraph-cu12==24.12.*,>=0.0.0a0 + - pylibcugraph-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibcugraph-cu11==24.12.*,>=0.0.0a0 + - pylibcugraph-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*pylibcugraph_unsuffixed]} - # deprecate pylibcugraphops - depends_on_pylibcugraphops: - common: - - output_types: conda - packages: - - &pylibcugraphops_unsuffixed pylibcugraphops==24.12.*,>=0.0.0a0 - - output_types: requirements - packages: - # pip recognizes the index as a global option for the requirements.txt file - - --extra-index-url=https://pypi.nvidia.com - - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple - specific: - - output_types: [requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - pylibcugraphops-cu12==24.12.*,>=0.0.0a0 - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - pylibcugraphops-cu11==24.12.*,>=0.0.0a0 - - {matrix: null, packages: [*pylibcugraphops_unsuffixed]} - depends_on_cupy: common: - output_types: conda diff --git a/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst deleted file mode 100644 index 39dae955ef3..00000000000 --- a/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst +++ /dev/null @@ -1,2 +0,0 @@ -cugraph-ops C++ API Reference -============================= diff --git a/docs/cugraph/source/api_docs/cugraph-ops/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/index.rst deleted file mode 100644 index 0f6a6c937d3..00000000000 --- a/docs/cugraph/source/api_docs/cugraph-ops/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -cugraph-ops API reference -========================= - -This page provides a list of all publicly accessible modules, methods and classes through `pylibcugraphops.*` namespace. - -.. toctree:: - :maxdepth: 2 - :caption: API Documentation - - python/index - c_cpp/index diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/dimenet.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/dimenet.rst deleted file mode 100644 index 6fadcc57b22..00000000000 --- a/docs/cugraph/source/api_docs/cugraph-ops/python/dimenet.rst +++ /dev/null @@ -1,24 +0,0 @@ -================= -Dimenet operators -================= - -.. currentmodule:: pylibcugraphops - -Radial Basis Functions ----------------------- -.. autosummary:: - :toctree: ../../api/ops - - dimenet.radial_basis_fwd - dimenet.radial_basis_bwd - dimenet.radial_basis_bwd_bwd - -Edge-to-Edge Aggregation -------------------------- -.. autosummary:: - :toctree: ../../api/ops - - dimenet.agg_edge_to_edge_fwd - dimenet.agg_edge_to_edge_bwd - dimenet.agg_edge_to_edge_bwd2_grad - dimenet.agg_edge_to_edge_bwd2_main diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/graph_types.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/graph_types.rst deleted file mode 100644 index 141d40393a5..00000000000 --- a/docs/cugraph/source/api_docs/cugraph-ops/python/graph_types.rst +++ /dev/null @@ -1,34 +0,0 @@ -=========== -Graph types -=========== - -.. currentmodule:: pylibcugraphops - - -CSC Graph ------------------ -.. autosummary:: - :toctree: ../../api/ops - - make_csc - -Heterogenous CSC Graph ----------------------- -.. autosummary:: - :toctree: ../../api/ops - - make_csc_hg - -Bipartite Graph ------------------ -.. autosummary:: - :toctree: ../../api/ops - - make_bipartite_csc - -Heterogenous Bipartite Graph ----------------------------- -.. autosummary:: - :toctree: ../../api/ops - - make_bipartite_csc_hg diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst deleted file mode 100644 index fb25f2fa005..00000000000 --- a/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst +++ /dev/null @@ -1,13 +0,0 @@ -cugraph-ops Python API reference -================================ - -This page provides a list of all publicly accessible modules, methods and classes through `pylibcugraphops.*` namespace. - -.. toctree:: - :maxdepth: 2 - :caption: API Documentation - - graph_types - operators - dimenet - pytorch diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst deleted file mode 100644 index 8b5efd7aa36..00000000000 --- a/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst +++ /dev/null @@ -1,109 +0,0 @@ -============================= -Operators for Message-Passing -============================= - -.. currentmodule:: pylibcugraphops - -Simple Neighborhood Aggregator (SAGEConv) ------------------------------------------ -.. autosummary:: - :toctree: ../../api/ops - - operators.agg_simple_n2n_fwd - operators.agg_simple_n2n_bwd - operators.agg_simple_e2n_fwd - operators.agg_simple_e2n_bwd - operators.agg_simple_n2n_e2n_fwd - operators.agg_simple_n2n_e2n_bwd - - operators.agg_concat_n2n_fwd - operators.agg_concat_n2n_bwd - operators.agg_concat_e2n_fwd - operators.agg_concat_e2n_bwd - operators.agg_concat_n2n_e2n_fwd - operators.agg_concat_n2n_e2n_bwd - - -Weighted Neighborhood Aggregation ---------------------------------- -.. autosummary:: - :toctree: ../../api/ops - - operators.agg_weighted_n2n_fwd - operators.agg_weighted_n2n_bwd - operators.agg_concat_weighted_n2n_fwd - operators.agg_concat_weighted_n2n_bwd - -Heterogenous Aggregator using Basis Decomposition (RGCNConv) ------------------------------------------------------------- -.. autosummary:: - :toctree: ../../api/ops - - operators.agg_hg_basis_n2n_post_fwd - operators.agg_hg_basis_n2n_post_bwd - -Graph Attention (GATConv/GATv2Conv) ------------------------------------ -.. autosummary:: - :toctree: ../../api/ops - - operators.mha_gat_n2n_fwd_bf16_fp32 - operators.mha_gat_n2n_fwd_fp16_fp32 - operators.mha_gat_n2n_fwd_fp32_fp32 - operators.mha_gat_n2n_bwd_bf16_bf16_bf16_fp32 - operators.mha_gat_n2n_bwd_bf16_bf16_fp32_fp32 - operators.mha_gat_n2n_bwd_bf16_fp32_fp32_fp32 - operators.mha_gat_n2n_bwd_fp16_fp16_fp16_fp32 - operators.mha_gat_n2n_bwd_fp16_fp16_fp32_fp32 - operators.mha_gat_n2n_bwd_fp16_fp32_fp32_fp32 - operators.mha_gat_n2n_bwd_fp32_fp32_fp32_fp32 - operators.mha_gat_n2n_efeat_fwd_bf16_fp32 - operators.mha_gat_n2n_efeat_fwd_fp16_fp32 - operators.mha_gat_n2n_efeat_fwd_fp32_fp32 - operators.mha_gat_n2n_efeat_bwd_bf16_bf16_bf16_fp32 - operators.mha_gat_n2n_efeat_bwd_bf16_bf16_fp32_fp32 - operators.mha_gat_n2n_efeat_bwd_bf16_fp32_fp32_fp32 - operators.mha_gat_n2n_efeat_bwd_fp16_fp16_fp16_fp32 - operators.mha_gat_n2n_efeat_bwd_fp16_fp16_fp32_fp32 - operators.mha_gat_n2n_efeat_bwd_fp16_fp32_fp32_fp32 - operators.mha_gat_n2n_efeat_bwd_fp32_fp32_fp32_fp32 - - operators.mha_gat_v2_n2n_fwd - operators.mha_gat_v2_n2n_bwd - operators.mha_gat_v2_n2n_efeat_fwd - operators.mha_gat_v2_n2n_efeat_bwd - -Transformer-like Graph Attention (TransformerConv) --------------------------------------------------- -.. autosummary:: - :toctree: ../../api/ops - - operators.mha_gat_v2_n2n_fwd - operators.mha_gat_v2_n2n_bwd - operators.mha_gat_v2_n2n_efeat_fwd - operators.mha_gat_v2_n2n_efeat_bwd - -Directional Message-Passing (DMPNN) ------------------------------------ -.. autosummary:: - :toctree: ../../api/ops - - operators.agg_dmpnn_e2e_fwd - operators.agg_dmpnn_e2e_bwd - -Update Edges: Concatenation or Sum of Edge and Node Features ------------------------------------------------------------- -.. autosummary:: - :toctree: ../../api/ops - - operators.update_efeat_e2e_concat_fwd - operators.update_efeat_e2e_concat_bwd - - operators.update_efeat_e2e_sum_fwd - operators.update_efeat_e2e_sum_bwd - - operators.update_efeat_e2e_concat_fwd - operators.update_efeat_e2e_concat_bwd - - operators.update_efeat_e2e_sum_fwd - operators.update_efeat_e2e_sum_bwd diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/pytorch.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/pytorch.rst deleted file mode 100644 index d2074df15b0..00000000000 --- a/docs/cugraph/source/api_docs/cugraph-ops/python/pytorch.rst +++ /dev/null @@ -1,36 +0,0 @@ -========================== -PyTorch Autograd Wrappers -========================== - -.. currentmodule:: pylibcugraphops.pytorch - -Simple Neighborhood Aggregator (SAGEConv) ------------------------------------------ -.. autosummary:: - :toctree: ../../api/ops - - operators.agg_concat_n2n - -Graph Attention (GATConv/GATv2Conv) ------------------------------------ -.. autosummary:: - :toctree: ../../api/ops - - operators.mha_gat_n2n - operators.mha_gat_v2_n2n - -Heterogenous Aggregator using Basis Decomposition (RGCNConv) ------------------------------------------------------------- -.. autosummary:: - :toctree: ../../api/ops - - operators.agg_hg_basis_n2n_post - - -Update Edges: Concatenation or Sum of Edge and Node Features ------------------------------------------------------------- -.. autosummary:: - :toctree: ../../api/ops - - operators.update_efeat_e2e - operators.update_efeat_e2e diff --git a/docs/cugraph/source/api_docs/index.rst b/docs/cugraph/source/api_docs/index.rst index c4b90b5794d..ccb7aacfeb5 100644 --- a/docs/cugraph/source/api_docs/index.rst +++ b/docs/cugraph/source/api_docs/index.rst @@ -24,8 +24,7 @@ Graph Neural Networks API Documentation cugraph-dgl/cugraph_dgl.rst cugraph-pyg/cugraph_pyg.rst - cugraph-ops/index.rst - wholegraph/index.rst + .. wholegraph/index.rst Additional Graph Packages API Documentation ---------------------------------- diff --git a/docs/cugraph/source/conf.py b/docs/cugraph/source/conf.py index 66bc3137fba..6573349aaec 100644 --- a/docs/cugraph/source/conf.py +++ b/docs/cugraph/source/conf.py @@ -222,8 +222,7 @@ def setup(app): breathe_projects = { 'libcugraph': os.environ['XML_DIR_LIBCUGRAPH'], - 'libcugraphops': os.environ['XML_DIR_LIBCUGRAPHOPS'], - 'libwholegraph': os.environ['XML_DIR_LIBWHOLEGRAPH'] + # 'libwholegraph': os.environ['XML_DIR_LIBWHOLEGRAPH'] } breathe_default_project = "libcugraph" diff --git a/docs/cugraph/source/graph_support/cugraphops_support.rst b/docs/cugraph/source/graph_support/cugraphops_support.rst deleted file mode 100644 index 96b13f62a9c..00000000000 --- a/docs/cugraph/source/graph_support/cugraphops_support.rst +++ /dev/null @@ -1,10 +0,0 @@ -================== -cugraphops Support -================== - -cugraph-ops aims to be a low-level, framework agnostic library providing commonly used computational primitives for GNNs and other graph operations. - -.. toctree:: - :maxdepth: 3 - - https://github.com/rapidsai/cugraph/blob/branch-24.06/readme_pages/cugraph_ops.md diff --git a/docs/cugraph/source/graph_support/gnn_support.rst b/docs/cugraph/source/graph_support/gnn_support.rst index 639b657c64d..71586621608 100644 --- a/docs/cugraph/source/graph_support/gnn_support.rst +++ b/docs/cugraph/source/graph_support/gnn_support.rst @@ -8,5 +8,4 @@ Graph Neural Network Support PyG_support.md DGL_support.md - cugraphops_support.rst wholegraph_support.md diff --git a/docs/cugraph/source/nx_cugraph/how-it-works.md b/docs/cugraph/source/nx_cugraph/how-it-works.md index 88788f3c0cc..0061b0445de 100644 --- a/docs/cugraph/source/nx_cugraph/how-it-works.md +++ b/docs/cugraph/source/nx_cugraph/how-it-works.md @@ -10,7 +10,7 @@ While NetworkX is a pure-Python implementation, backends may be written to use o ## Enabling nx-cugraph -It is recommended to use `networkx>=3.4` for optimal zero code change performance, but `nx-cugraph` will also work with `networkx 3.0+`. +It is recommended to use `networkx>=3.4` for optimal zero code change performance, but `nx-cugraph` will also work with `networkx 3.2+`. NetworkX will use `nx-cugraph` as the backend if any of the following are used: diff --git a/docs/cugraph/source/nx_cugraph/index.rst b/docs/cugraph/source/nx_cugraph/index.rst index 50565c805a9..0eb8907b397 100644 --- a/docs/cugraph/source/nx_cugraph/index.rst +++ b/docs/cugraph/source/nx_cugraph/index.rst @@ -49,7 +49,7 @@ Users can have GPU-based, large-scale performance **without** changing their fam +--------------------------------------------------------------------------------------------------------+ | **Run the same code on CPU or GPU** | | | -| Nothing changes, not even your `import` statements, when going from CPU to GPU. | +| Nothing changes, not even your ``import`` statements, when going from CPU to GPU. | +--------------------------------------------------------------------------------------------------------+ diff --git a/docs/cugraph/source/nx_cugraph/installation.md b/docs/cugraph/source/nx_cugraph/installation.md index a816801d001..9675306c47b 100644 --- a/docs/cugraph/source/nx_cugraph/installation.md +++ b/docs/cugraph/source/nx_cugraph/installation.md @@ -10,7 +10,7 @@ This guide describes how to install ``nx-cugraph`` and use it in your workflows. - **Volta architecture or later NVIDIA GPU, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+** - **[CUDA](https://docs.nvidia.com/cuda/index.html) 11.2, 11.4, 11.5, 11.8, 12.0, 12.2, or 12.5** - **Python >= 3.10** - - **[NetworkX](https://networkx.org/documentation/stable/install.html#) >= 3.0 (version 3.4 or higher recommended)** + - **[NetworkX](https://networkx.org/documentation/stable/install.html#) >= 3.2 (version 3.4 or higher recommended)** More details about system requirements can be found in the [RAPIDS System Requirements Documentation](https://docs.rapids.ai/install#system-req). diff --git a/docs/cugraph/source/nx_cugraph/supported-algorithms.rst b/docs/cugraph/source/nx_cugraph/supported-algorithms.rst index c53e376a463..f672c0da41b 100644 --- a/docs/cugraph/source/nx_cugraph/supported-algorithms.rst +++ b/docs/cugraph/source/nx_cugraph/supported-algorithms.rst @@ -52,6 +52,7 @@ Algorithms +--------------------------+ | **Bipartite** | +==========================+ +| betweenness_centrality | | complete_bipartite_graph | +--------------------------+ @@ -186,7 +187,7 @@ Algorithms +---------------------------------------+ +---------------------------+ -| **Traversal** | +| **Traversal** | +===========================+ | bfs_edges | +---------------------------+ diff --git a/docs/cugraph/source/tutorials/cugraph_blogs.rst b/docs/cugraph/source/tutorials/cugraph_blogs.rst index 3665f425e3f..57fa011ab59 100644 --- a/docs/cugraph/source/tutorials/cugraph_blogs.rst +++ b/docs/cugraph/source/tutorials/cugraph_blogs.rst @@ -65,9 +65,9 @@ Academic Papers * Alex Fender, Brad Rees, Joe Eaton (2022) `Massive Graph Analytics `_ Bader, D. (Editor) CRC Press - * S Kang, A. Fender, J. Eaton, B. Rees:`Computing PageRank Scores of Web Crawl Data Using DGX A100 Clusters`. In IEEE HPEC, Sep. 2020 + * S Kang, A. Fender, J. Eaton, B. Rees. `Computing PageRank Scores of Web Crawl Data Using DGX A100 Clusters `_. In IEEE HPEC, Sep. 2020 - * Hricik, T., Bader, D., & Green, O. (2020, September). `Using RAPIDS AI to accelerate graph data science workflows`. In 2020 IEEE High Performance Extreme Computing Conference (HPEC) (pp. 1-4). IEEE. + * Hricik, T., Bader, D., & Green, O. (2020, September). `Using RAPIDS AI to accelerate graph data science workflows `_. In 2020 IEEE High Performance Extreme Computing Conference (HPEC) (pp. 1-4). IEEE. * Richardson, B., Rees, B., Drabas, T., Oldridge, E., Bader, D. A., & Allen, R. (2020, August). Accelerating and Expanding End-to-End Data Science Workflows with DL/ML Interoperability Using RAPIDS. In Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (pp. 3503-3504). diff --git a/docs/cugraph/source/wholegraph/installation/container.md b/docs/cugraph/source/wholegraph/installation/container.md index 6aac53cf88f..4068ead27b2 100644 --- a/docs/cugraph/source/wholegraph/installation/container.md +++ b/docs/cugraph/source/wholegraph/installation/container.md @@ -21,7 +21,7 @@ RUN pip3 install -U py RUN pip3 install Cython setuputils3 scikit-build nanobind pytest-forked pytest ``` -To run GNN applications, you may also need cuGraphOps, DGL and/or PyG libraries to run the GNN layers. +To run GNN applications, you may also need DGL and/or PyG libraries to run the GNN layers. You may refer to [DGL](https://www.dgl.ai/pages/start.html) or [PyG](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) For example, to install DGL, you may need to add: diff --git a/img/cugraphops_context.png b/img/cugraphops_context.png deleted file mode 100644 index 8db157d2f09..00000000000 Binary files a/img/cugraphops_context.png and /dev/null differ diff --git a/notebooks/README.md b/notebooks/README.md index f0d0a25b9dd..96f8ed5ce10 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -66,13 +66,13 @@ The easiest way to run the notebooks is to get the latest [rapidsai/notebooks](h For example, get the latest (as of writing the document) nightly image (`a` after the version number indicates that an image is nightly) with cuda 12.0 using ```sh -docker pull rapidsai/notebooks:24.12a-cuda12.0-py3.10 +docker pull rapidsai/notebooks:25.02a-cuda12.0-py3.10 ``` And, then run a container based on the image using ```sh -docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:24.12a-cuda12.0-py3.10 +docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:25.02a-cuda12.0-py3.10 ``` You are all set. Run and edit cugraph notebooks from a browser at url http://127.0.0.1:8888/lab/tree/cugraph/cugraph_benchmarks @@ -88,8 +88,8 @@ ssh -L 127.0.0.1:8888:127.0.0.1:8888 [USER_NAME@][REMOTE_HOST_NAME or REMOTE_HO and then run the container in your remote machine. ```sh -docker pull rapidsai/notebooks:24.12a-cuda12.0-py3.10 -docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:24.12a-cuda12.0-py3.10 +docker pull rapidsai/notebooks:25.02a-cuda12.0-py3.10 +docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:25.02a-cuda12.0-py3.10 ``` You can run and edit cugraph notebooks at url http://127.0.0.1:8888/lab/tree/cugraph/cugraph_benchmarks as if they are running locally. diff --git a/notebooks/demo/centrality_patentsview.ipynb b/notebooks/demo/centrality_patentsview.ipynb new file mode 100644 index 00000000000..be201fc392a --- /dev/null +++ b/notebooks/demo/centrality_patentsview.ipynb @@ -0,0 +1,1852 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "xkg10FrNThrK" + }, + "source": [ + "# Centrality Measures with cuGraph and US Patent Citations.\n", + "This notebook will demonstrate RAPIDS cuGraph to do centrality calculations on US Patent data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mefjUEAnZ4pq" + }, + "source": [ + "# Downloading the data\n", + "\n", + "Citation: U.S. Patent and Trademark Office. “Data Download Tables.” PatentsView. Accessed [10/06/2024]. https://patentsview.org/download/data-download-tables.\n", + "\n", + " Both files are used under the Creative Commons license https://creativecommons.org/licenses/by/4.0/\n", + "\n", + "\n", + "The first file, g_patent.tsv.zip, contains summary data for each patent such as id, title and the location of the original patent document. The table description is available on the [PatentsView site](https://patentsview.org/download/data-download-dictionary).\n", + "\n", + "The second file, g_us_patent_citation.tsv.zip, contains a record for every citation between USPatents. The description of this table is also available on the [PatentsView site](https://patentsview.org/download/data-download-dictionary)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Removing the comment character \"#\" and running the below lines will download and expand the data into the directory the notebook expects it to be in." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "lyYF0LbtFwjh" + }, + "outputs": [], + "source": [ + "#!wget https://s3.amazonaws.com/data.patentsview.org/download/g_patent.tsv.zip\n", + "#!unzip ./g_patent.tsv.zip\n", + "#!wget https://s3.amazonaws.com/data.patentsview.org/download/g_us_patent_citation.tsv.zip\n", + "#!unzip ./g_us_patent_citation.tsv.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# We will create the dataframes using cudf and create the graphs with cuGraph\n", + "import cudf\n", + "import cugraph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This method takes a list of patents and goes out one hop. It returns the new list of all the patents which can\n", + "be used to in the same function to go out an additional hop." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def next_hop(seeds):\n", + " seed_df = seeds.to_frame('seed')\n", + " source_df = citation_df.merge(seed_df, left_on='source', right_on='seed', how='inner')\n", + " target_df = (citation_df.merge(seed_df, left_on='target', right_on='seed', how='inner'))\n", + " links_df = cudf.concat([source_df,target_df])\n", + " links_df = links_df.drop('seed', axis=1)\n", + " new_seed_set = links_df.melt()\n", + " new_seed_set = new_seed_set.drop('variable',axis=1)\n", + " new_seed_set = new_seed_set['value'].drop_duplicates()\n", + " return links_df.drop_duplicates(), new_seed_set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function displays the top results of all the centrality algorithms" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Print function\n", + "def print_centrality(k, dc, bc, kr, pr, ev):\n", + "\n", + " dc_top = dc.sort_values(by='degree_centrality', ascending=False).head(k).to_pandas()\n", + " bc_top = bc.sort_values(by='betweenness_centrality', ascending=False).head(k).to_pandas()\n", + " kr_top = kr.sort_values(by='katz_centrality', ascending=False).head(k).to_pandas()\n", + " pr_top = pr.sort_values(by='pagerank', ascending=False).head(k).to_pandas()\n", + " ev_top = ev.sort_values(by='eigenvector_centrality', ascending=False).head(k).to_pandas()\n", + " \n", + " df1_styler = dc_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Degree').hide(axis='index')\n", + " df2_styler = bc_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Betweenness').hide(axis='index')\n", + " df3_styler = kr_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Katz').hide(axis='index')\n", + " df4_styler = pr_top.style.set_table_attributes(\"style='display:inline'\").set_caption('PageRank').hide(axis='index')\n", + " df5_styler = ev_top.style.set_table_attributes(\"style='display:inline'\").set_caption('EigenVector').hide(axis='index')\n", + "\n", + " display_html(df1_styler._repr_html_()+df2_styler._repr_html_()+df3_styler._repr_html_()+df4_styler._repr_html_()+df5_styler._repr_html_(), raw=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After running the centrality algorithms, this function displays the supplied graph. The dataframe from any of the centrality calculations is also supplied along with the number of the highest ranking nodes we want to emphasize with labels and scale with size based on the centrality value." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def draw_centrality_graph(graph_df, number_to_label, cent_df):\n", + " import networkx as nx\n", + " import matplotlib.pyplot as plt\n", + "\n", + " top_ones = cent_df.sort_values(by=cent_df.columns[0], ascending=False).head(number_to_label)[cent_df.columns[1]].to_pandas()\n", + " print(top_ones)\n", + " sizes = []\n", + " nodes = []\n", + "\n", + " plt.rcParams['figure.figsize'] = [12, 8]\n", + " nx_g = nx.from_pandas_edgelist(graph_df.to_pandas())\n", + "\n", + " for node in nx_g.nodes():\n", + " cent_value =cent_df.loc[cent_df['vertex'] == node, cent_df.columns[0]].values[0]\n", + " nodes.append(node)\n", + " sizes.append((cent_value[0]*3000+10).item())\n", + "\n", + " pos = nx.spring_layout(nx_g)\n", + " labels = {node: node for node in top_ones}\n", + " nx.draw(nx_g, pos=pos, nodelist=nodes,font_color='red' ,node_size=sizes,node_color='grey', node_shape='o', linewidths=2, labels=labels)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function executes the five centrality algorithms." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute Centrality\n", + "# the centrality calls are very straightforward with the graph being the primary argument\n", + "# we are using the default argument values for all centrality functions except where noted.\n", + "\n", + "def compute_centrality(_graph, highest_degree=2000) :\n", + "\n", + " # Compute Degree Centrality\n", + " _d = cugraph.degree_centrality(_graph)\n", + " print(\"Degree Centrality done\")\n", + "\n", + " # Compute the Betweenness Centrality\n", + " # The k value is needed in large graphs as by default, it will calculate the\n", + " # betweenness centrality score based on every possible pairs in the graph.\n", + " # Limiting the number of vertices involved in the shortest path search leads to a better runtime.\n", + " # In this case we limit it to 100 samples at most.\n", + " _b = cugraph.betweenness_centrality(_graph,k=100)\n", + " print(\"Between Centrality done\")\n", + "\n", + " # Compute Katz Centrality\n", + " # alpha is set to one divided by the maximum degree in the graph. This will enable convergence\n", + " # max_iter will determine how many iterations will be run before giving up if convergence doesn't occur\n", + " # tol is the tolerance which determines the maximum difference that indicatates convergence \n", + " _k = cugraph.katz_centrality(_graph, max_iter=1000, tol=1.0e-2,alpha=1/highest_degree)\n", + " print(\"Katz Centrality done\")\n", + "\n", + " # Compute PageRank Centrality\n", + " _p = cugraph.pagerank(_graph)\n", + " print(\"Pagerank done\")\n", + "\n", + " # Compute EigenVector Centrality\n", + " _e = cugraph.eigenvector_centrality(_graph, max_iter=1000, tol=1.0e-3)\n", + " print(\"Eigenvector Centrality done\")\n", + "\n", + " return _d, _b, _k, _p, _e" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we load the entire US patent citation edge list into a cuDF dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# load the citation graph\n", + "import cudf\n", + "citation_df = cudf.read_csv(\"../data/g_us_patent_citation.tsv\",\n", + " sep='\\t',\n", + " header=0,\n", + " usecols=[0,2],\n", + " names=[\"source\", \"target\"],\n", + " dtype={\"source\":str,\"target\":str},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sourcetarget
0100000005093563
1100000005751830
2100000017804268
3100000019022767
4100000019090016
\n", + "
" + ], + "text/plain": [ + " source target\n", + "0 10000000 5093563\n", + "1 10000000 5751830\n", + "2 10000001 7804268\n", + "3 10000001 9022767\n", + "4 10000001 9090016" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Display the first five edges in the dataframe.\n", + "citation_df.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting dataframe has over 142 million edges" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "142183260" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(citation_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Choose a patent of interest. In this case we will choose a patent representing an advance in visualizing machine learning models. Adding additional patents to the seed list can be done here." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "poi = [\"10810491\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Go out one hop from the patent(s) supplied." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "seed_series=cudf.Series(poi)\n", + "first_hop_df, first_set = next_hop(seed_series)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show how many patents cite or are cited by the starting one(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "32" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(first_hop_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case we will just use the second hop edge/patent list. However, for demonstation purposes, we show the next_hop function can go out as many hops as necessary to build a relevant graph as needed for different data sets. The code below goes out four levels of separation." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "second_hop_df, second_hop_seeds = next_hop(first_set)\n", + "third_hop_df, third_hop_seeds = next_hop(second_hop_seeds)\n", + "fourth_hop_df, fourth_hop_seeds = next_hop(third_hop_seeds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are node (patent) counts at each hop." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(33, 402, 29916, 659475)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(first_set), len(second_hop_seeds),len(third_hop_seeds),len(fourth_hop_seeds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are edge(citation) counts at each hop." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(32, 465, 122774, 5177479)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(first_hop_df), len(second_hop_df),len(third_hop_df),len(fourth_hop_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The contents of the dataframe we will use contains 2 hops." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sourcetarget
0108104918234228
1108104916038337
2110864715222210
3110864716049793
4110864717475072
.........
213RE463106038337
214RE480566038337
215RE479086038337
216RE493346038337
217RE493876038337
\n", + "

465 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " source target\n", + "0 10810491 8234228\n", + "1 10810491 6038337\n", + "2 11086471 5222210\n", + "3 11086471 6049793\n", + "4 11086471 7475072\n", + ".. ... ...\n", + "213 RE46310 6038337\n", + "214 RE48056 6038337\n", + "215 RE47908 6038337\n", + "216 RE49334 6038337\n", + "217 RE49387 6038337\n", + "\n", + "[465 rows x 2 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "second_hop_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we will build a directed Graph in cuGraph from the second hop dataframe created above" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 92 ms, sys: 24.9 ms, total: 117 ms\n", + "Wall time: 114 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "G = cugraph.from_cudf_edgelist(second_hop_df,create_using=cugraph.Graph(directed=True),source='source', destination='target')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use the compute_centrality function above to calculate and note the execution time" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Degree Centrality done\n", + "Between Centrality done\n", + "Katz Centrality done\n", + "Pagerank done\n", + "Eigenvector Centrality done\n", + "CPU times: user 115 ms, sys: 24.6 ms, total: 139 ms\n", + "Wall time: 137 ms\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dacosta/miniforge3/lib/python3.12/site-packages/cugraph/centrality/katz_centrality.py:121: UserWarning: Katz centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n", + " warnings.warn(warning_msg, UserWarning)\n", + "/home/dacosta/miniforge3/lib/python3.12/site-packages/cugraph/link_analysis/pagerank.py:227: UserWarning: Pagerank expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n", + " warnings.warn(warning_msg, UserWarning)\n", + "/home/dacosta/miniforge3/lib/python3.12/site-packages/cugraph/centrality/eigenvector_centrality.py:88: UserWarning: Eigenvector centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n", + " warnings.warn(warning_msg, UserWarning)\n" + ] + } + ], + "source": [ + "%%time\n", + "dc, bc, kc, pr, ev = compute_centrality(G)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We import the formatting package and print out the top 10 patents for each centrality measure" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Degree
degree_centralityvertex
0.3940156038337
0.15212011412041
0.07980110810491
0.06982511977958
0.0548638234228
0.05486311769180
0.05486311682052
0.04987511900046
0.04738211921730
0.03990011960575
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Betweenness
betweenness_centralityvertex
0.0039856038337
0.00105310810491
0.00045111182695
0.0002768234228
0.0000008910022
0.00000010884769
0.0000008311967
0.00000010318474
0.00000010621019
0.0000006760808
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Katz
katz_centralityvertex
0.0537106038337
0.05059410810491
0.0503708234228
0.04999611182695
0.0499219652109
0.04992110706450
0.0499219092802
0.04992110929916
0.04992110402061
0.0499217703036
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PageRank
pagerankvertex
0.1848896038337
0.0537555450504
0.0537555408585
0.0537555497430
0.0288058234228
0.0258547398259
0.01524910810491
0.00835411182695
0.00408611304074
0.00396910379502
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EigenVector
eigenvector_centralityvertex
0.5358825450504
0.5358825408585
0.5358825497430
0.2806926038337
0.1873557398259
0.1389018234228
0.06137510810491
0.01751110379502
0.01751110432689
0.0098619743237
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import display_html\n", + "\n", + "print_centrality(10, dc, bc, kc, pr, ev)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now call the function that draws the graph with the specified number of the most central nodes labeled.\n", + "The final parameter, pr in this case, for PageRank sends in the particular algorithm results to graph." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "306 6038337\n", + "7 5450504\n", + "284 5408585\n", + "355 5497430\n", + "307 8234228\n", + "21 7398259\n", + "320 10810491\n", + "308 11182695\n", + "312 11304074\n", + "256 10379502\n", + "257 10432689\n", + "339 9743237\n", + "Name: vertex, dtype: object\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABMQAAAMzCAYAAABJAM7VAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1SUV9s18D3D0KsI0qQJohQRO2IFu8aKLbHHhi2aWJOY+mgSNbYENfbeKzGKFSsogiAKiKBSpfdeZ74//JhXwgygMWJk/9ZiPXKf+z5zZvB9NdvrXEcgkUgkICIiIiIiIiIiaiCE9b0AIiIiIiIiIiKid4mBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBoWBGBERERERERERNSgMxIiIiIiIiIiIqEFhIEZERERERERERA0KAzEiIiIiIiIiImpQGIgREREREREREVGDwkCMiIiIiIiIiIgaFAZiRERERERERETUoDAQIyIiIiIiIiKiBkVU3wsgIiIiImooxGIxIiMjERsbC5FIBFtbWxgbG9f3soiIiBocgUQikdT3IoiIiIiI3kfl5eUIDg5GREQEKioqYGlpiQ4dOkBNTe215yoqKsKhQ4eQkJBQ5XqnTp3Qr18/CASCt7VsIiIiqgUDMSIiIiIiGUpLS7F///5qAZa2tjamTJkCbW3t15rvzJkzCAkJkTnm7u4OBweHN14rERERvR5umSQiIiKiD1pFRQXy8/OhqqoKJSWlOj93586damEYAOTk5ODSpUsYNWpUnecqLS3Fo0eP5I4HBQXVGIjl5ubizp07iIuLg6KiImxtbdGuXTuIRPzrPBER0Zvgn6BERERE9EESi8W4efMm/P39UVxcDKFQCHt7e/Tv379OWx7DwsLkjj1+/BhlZWVQVFSs01oKCgogFovljufm5sodS01NxZ49e1BUVCS9Fhsbi/DwcEyYMIGhGBER0Rvgn55ERERE9EG6cOECAgICpN+LxWI8evQIqampmDZtWq1BUnFxsdwxiUTyWoGYpqYmlJSUUFpaKnNcT09P7rPe3t5VwrBKcXFxCAgIQOfOnWtc5/379xEYGIjs7Gw0btwYHTt2hKOjI3uWERFRgyas7wUQEREREb1tOTk5CAwMlDmWkpKC8PDwWudo2rSp3DFdXV2oqqrWeT0ikQjt27eXO96pUyeZ1wsKChATEyP3uZqq2ADgr7/+wrlz55CSkoKSkhIkJibizJkzuHbtWp3WTURE9KFiIEZEREREH5z4+HjUdHZUTSFTpa5du0IolP3X5e7du8utsEpOToa3tzeOHz+O69evS7dDurm5wdHRscq9IpEIgwYNgqWlpcy55FWU1WU8KSkJQUFBMsdu3bqF7OzsGucmIiL6kHHLJBERERG9lyQSCSIjI6X9uszNzeHk5FSnxvi1bYesS98tY2NjfPLJJzh//jwyMzOlz/Xv3x+tW7eW+czdu3dx8eLFKtfu3LmDjz/+GBYWFhg+fDiePXuGEydO4I8//oCNjU2N/cy0tbWhqamJvLw8meOmpqZyn42MjKzx/UVFRaFDhw413lNcXIz09HSoq6ujUaNGNd5LRET0X8JAjIiIiIjeO2KxGCdPnqyytTE8PBz+/v6YNGkStLS0anze0tISysrKKCkpkTluZ2dXp3VYWVlh7ty5SE5OhrOzM7744gu0a9dO5r1paWnVwjDgZRXXqVOnMH/+fCgoKOD58+coLCyEk5NTra8vFArRo0cP/PXXX9XGlJSU4OLiIvfZmpr41zZeXl6OS5cuISgoCBUVFQAAc3NzDBkyBLq6urWum4iI6H3HLZNERERE9N4JCgqS2ecrMzMTFy5cqPV5ZWVlDBgwQOaYk5MTzM3N67wWgUAAIyMjaGlpISoqSu59Dx8+lDuWl5eH58+fA3hZmdW8efM6v367du3g7OxcpUrMyMgIEydOROPGjeU+16xZsxrnrWn83LlzCAgIkIZhwMuTLfft2yc3ZCQiIvovYYUYEREREb13agqXIiIiUFxcDBUVlRrnaN26NXR0dHD37l2kpKQgMTERQUFB+Prrr+t0wqJEIkFiYiLS0tKgoaEBGxubGrchFhQU1Dhf5XhUVBTGjh1b6+u/KjAwEFu3bkVoaCi0tbWho6NT6zNmZmZo3ry5zBCvTZs20NfXl/lcTk4OHjx4IHcsJCQEHTt2fJ3lExERvXcYiBERERHRe6emcEkikdQpEANebvOrrAbz9/fHqlWrcOnSJQwcOLDG5/Lz83H8+HHExcVJr7Vq1Urm1sVKhoaGNc5pZGSE4uJixMXFvVaFmEQiwb59+zB8+PAqlW1isRj5+flQUVGR2VdNIBAgPj4et2/fhpubG0pLS6GpqYkOHTqgS5cucl/vxYsXNa4nISGBgRgREf3nMRAjIiIioveOkZGRtJH936mpqdXaQwx4uU0xJiYGQqEQlpaW6NixI9q0aYMtW7bUGIhJJJJqYRjwMmDq1asXMjMzZfbRcnR0xK1bt5Cfn19tzMrKCgYGBggLC4NEIoGNjU2t66907949REZGYtOmTdL1+fv7w9fXF/n5+RAIBLC1tUX//v2hqakpfS4uLg4//vgjZs6ciWXLlqGsrAyKioq1VsfVFjQqKyvXee1ERETvKwZiRERERPTe6dy5M8LDwyGRSGSOCYXyW+FKJBJcuXIFd+7ckT4vEonQq1cvzJ49GzNmzEBMTAwsLCxkPp+YmFgtDKukpqaGGzduYPjw4dXGVFRUMGHCBJw8eRKpqanS6zY2Nhg2bBgASLcvmpmZ4dKlS3j06BGKi4thYmKCbt26wcrKqtq8+/btQ9OmTeHq6goAuHnzJq5fv17l/YaHhyM5ORkzZ86UVovNnz8fOjo6+OGHHyAQCOp0OifwsqpOXV1dbpWeg4NDneYhIiJ6n7GpPhERERG9d0xMTDBq1Cioq6tXuX7r1i3k5OTU+Ky/vz/8/PyqhGnl5eW4ePEi2rdvDy0tLWzbtk3u82lpaTXOHx8fL3esSZMm8PDwgIKCAs6fP4+5c+fi448/hqqqKgAgMjISOjo6uHjxIu7cuYP8/HyUl5cjNjYWBw4cQGhoaJX5SkpKcOTIEYwfPx4KCgooLi7G7du3Zb52ZmYmgoODAQB//fUXzpw5gw0bNlSpGqurkJAQlJeXV7veqVOn1zqQgIiI6H3FCjEiIiIiei/Z2trCxsYGcXFxKC8vh7GxMe7du4cpU6bg4cOHMnt2SSQS3L17V+6cwcHBmDRpEnbs2IHvvvtO5va/2gKk3NzcGscFAgGKi4uRkpJS7RTIqKgouLm5ISkpSeazFy9ehK2tLRQUFAAA58+fR2ZmJiZMmADgZX8vWUFVpdjYWLRq1Qrz5s1D3759MXLkyBrX+ncSiQTz5s3D4cOHceTIEaipqSElJQXq6upwcnKS9j4rLy9Hbm4u1NTU6tTLjYiI6H3DQIyIiIiI3lvZ2dkoLS2FlpYW1NTUsHv3bjg6OmLSpEnw9vautnWyrKysxgqy1NRUeHh44LfffsOpU6fw8ccfV7vH0tISOjo6yM7OrjYmFouRkJBQ67oLCwuhpqZW7XpUVBQ6dOgg97n8/HwkJyfDxMQEALB37160b98ednZ2AF5u/ayJgoICfvrpJyQlJeHy5ctV+oVJJBKEhYUhMDAQ2dnZ0NXVRceOHdGyZUvpPT///DO2bNmCHTt2yAzTxGIxbty4AX9/fxQXF0MgEKBly5YYMGBAjUFieXk5AgICEBYWhpKSEpiamsLFxQV6eno1vh8iIqJ/CwMxIiIiIvrXlJeXIzQ0FLGxsVBUVIStrS0sLCxqbexeWFiI06dP4+nTp9JrxsbGcHd3x969e9G/f39s3LgRn3/+eZXnRCIRlJWVUVJSInNeTU1N2NrawtXVFZs3b5YZiAmFQowaNQoHDx5EYWFhlespKSkIDw+v9X3XFIj16NGjxmcrt3qmp6fj3LlzWLdunXSsadOm0NDQkNm4HwC0tLSwevVqfPXVV7C2tq4yduXKFfj5+Um/z8nJQXR0NFxdXdG9e3fs3bsXX3/9Nb7//ntMnTpV5vyXLl2Cv79/lbU+fvwYaWlpmDFjBhQVFas9U1FRgQMHDiA2NlZ6LT09HaGhoRg/fjzMzMxq/DyIiIj+DewhRkRERET/ivz8fGzfvh1eXl548OABAgICsG/fPvz5558ym+VXkkgkOHr0aJUwDHjZ7H7//v1wc3PDF198gaVLl0p7ZlUSCoVwcnKSO3ebNm0AALNmzcLt27fx6NEjmfcZGxtj7ty56N+/Pxo3bozr16/D3d0dxsbGiIyMrPW9ywrE8vPzkZiYWGNVlLq6unQr6JEjRwAAY8eOlY4rKCjIPSGzefPm+OWXX2BmZoZly5ZVGUtLS6sShr3q+vXr+PPPPzFt2jRMmzYN3377rcz78vLycO/ePZlj6enpcj/LoKCgKmFYpbKyMpw7d67G3wtERET/FgZiRERERPSvuHDhQpXTFis9ePAADx8+lPtcQkKC3FMes7OzER4ejp9++gn29vb45JNPqp2G6ObmJrPqyMDAAJ06dQIADBs2DIaGhtiyZYvcdaiqqqJTp04YMWIErl+/jsjISNjY2CA9PR2ZmZlynwNeBmKVjfQrVQZ8bdq0gb6+vsznevfuLd0WuW/fPgwaNKjavVZWVvjrr7+Qk5MDHR0dGBsbo3///pBIJPDx8YGnp2e1vl4RERFy1yqRSLBmzRr069cPW7ZskVu9l5CQUGN4JSv0AoDHjx/LfSY1NbXWz5KIiOjfwECMiIiIiN664uLiGrcWPnjwQO5YcnJyjXMnJSVBWVkZhw8fRmxsLL744osq40pKSpg0aRJGjx6N9u3bo0OHDrhx4wZOnjwp7TmmqKiI6dOnY//+/cjLy0NSUhJ8fHxw8eJFPHnyBGKxWDqfsbExjIyMEBgYCBsbGwAvtz7WRFaFWOUztra2mDx5MoyMjFBcXAwAMDIywujRo6XVbY8fP0ZAQAAmTpxYbe49e/bg/v37mDp1KubPn4/p06ejZcuWWLhwIUaOHIn+/ftXe6aioqLG9ZqYmODo0aM19iiTtR2yLuM1HQIAvKwUIyIietfYQ4yIiIiI3rqioqIaq4ny8vLkjsnqvSVrvGXLlti4cSNmzJiBfv36YcSIEdJ7hEIhbG1tYWtrCwAoKSmRVnr17NkTADB9+nSsXLkSf/zxR5VeYXfv3kXTpk0xbtw4aaVVu3btEBgYKN2KGBkZKa02+zuxWAxVVVU0bdoUN27cgJ2dHfT19REZGYlGjRpJT55MT0/H5s2bkZ6eXi1M2r9/Pxo1aoRBgwZVuV5aWoqVK1di9OjRsLe3l15fvnw5CgoKsGHDBplrsrCwwI0bN+R+pt9++y3U1dXljgOAubk5FBQU5IZrlZ/139XUbF9NTY2N9YmIqF6wQoyIiIiI3jotLa1q2/ZeZWBgIHfMxsam2nbDSgKBAK1atZJ+P23aNLi7u2PatGmIj4+XO+ewYcPQtm1bLF++XBrUmZqaYuLEiVXCsEoJCQm4ePGi9Pv27dsjMDAQ6urqMDExkdtHrLCwELt27YKjoyN0dXVx/fp1bN68GdevX0dUVJS0wgwA/Pz80Llz52phWEVFBfbv34+xY8dCWVm5ytiuXbtQVlYGDw8P6brv37+PzZs344cffpCeTvl35ubmcn8eZmZm0lMs5ZFIJNi4cSMOHz4sM+hs3bo1mjVrVu2ZHTt2YO7cudJKuL/r3r17rSdnEhER/RsYiBERERHRW6egoICOHTvKHBOLxYiJiZFbQaaoqIgRI0ZUC0okEgnu3r1bZTujQCDAtm3boK6ujgkTJsitXhIIBFixYgV8fX2rBF2Ojo5y38OjR49QWloK4GUglpGRgdjYWNjY2MgNxM6fP48XL15Uu37jxg1kZWWhefPm0s/gzp07cHFxqXbv9evXkZCQUG27ZHR0NCIiIjBz5kzcuHEDa9euxfnz5zF79mw4ODjgs88+k/te/P398eOPPyIvL08awJWUlKBZs2aYMGGC3OeAl1se586di8WLF2P48OGYNm0aHBwcoKenBwsLCwwdOhRDhw6t0nssLS0Nw4cPx/Tp0zF06FB4eHhUCcw0NTUxaNAgub9HiIiI/m385xgiIiIi+lf06NEDhYWFuH//vjT8UlZWRl5eHpYsWYLnz5/j999/l1khZG1tjTlz5uD+/fvIyMiAlpYW9PT0sGnTJgwfPhyXL1+WVk/p6uriwIEDcHV1xapVq/DVV1/JXE///v3RpUsXLF++HP369YNAIKixOqmiogKFhYVQUlJCu3btALysxrKxscHdu3er3V9YWFhj3zQdHR1YWVkBeLnlMjMzU2Ygtm/fPjRv3rzKlszc3FzpNspKYrEYAQEBaNSoEb755hu57+XFixcYPnw4nJycsGLFCixYsABHjhzBwYMHq23JrCSRSJCUlISUlBT89NNPOH36NLZt24bp06cDAJo2bSr3fXp7e2PKlCmoqKjAmTNnMHToUADAhAkTUFhYiJKSEmhra0v7uREREdUHBmJERERE9EYqKioQFxeH4uJimJiYQEtLq8q4UCjEoEGD0LVrV8THx0NRURGWlpZQUlJCs2bN4OHhgefPn+PYsWPQ1tauNr+Ojg569eol/V4sFuP48eMYOHAgpkyZggMHDkhDlR49euCrr77Ct99+i169esns71VZJebq6oozZ85g+PDh0NPTk3vKoZKSEjQ0NAAAhoaGaNq0qbSx/v79+yEWi6uEOnl5eTX2TVNVVZVWiPn5+UEoFFarkMrPz8fJkyfx5ZdfVqm4unv3rty5nZ2d0bZtW5ljRUVFGD58OEQiEU6dOoW1a9di69at2Llzp9wwLCMjAydPnkRSUhIAwMHBAS4uLvj444/lvjfgZSC4ZMkSbNq0CQMGDMCuXbtgYGCAmJgYpKWlQVtbG1ZWVrX2iCMiInoXBJKa/tQmIiIiIpLh2bNn8PLykjbHFwgEcHJywqBBg6CgoFCnOXx8fODu7g4TExP89ddfsLCwkHlfRUUFrl+/jsDAQBQXF0MoFMLPzw/t2rXDihUrpPeVlZWhW7duSEtLQ3BwcLWArlLv3r2RnJyMkJAQxMbGYv/+/TLvc3FxQZ8+faTfDx8+HPn5+ViwYAE++ugjxMfHV6mUKikpwZo1a+Ru2wwNDcWyZcvQrl07TJs2DYGBgdVO29y/fz8mTpyImJgYmJubS6//9NNPNZ7GOHny5Cr3Ay+rvCZOnIgTJ07g9u3bePToEaZMmYIff/wR33zzjcx5ysrKsHnzZmRnZ1cba9q0KT799NMqQV2loKAgjB8/HtHR0Vi7di1mzZqF3NxcHDlypMqpoVpaWhg9erTcXmevKiwsRGhoKLKysqCrq4tWrVrV2JeOiIjodbBOmYiIiIheS3p6Og4fPlzlpEiJRILg4GBcunSpzvO4ubnhzp07KCoqQqdOneDv7y/zPi8vL9y+fVvamF0sFsPZ2RmxsbHYuXOn9D5FRUUcOnQIaWlpmDt3rtzXXbFiBcLCwnDs2DE0a9YMAwYMqNKXDADs7Ozg5uZW5VplY/3KKq+/9xFTVlZG69at5b7uvXv3qlSIydouuXfvXvTs2bNKuFVcXIyIiAi58wKQeQjBunXrcODAAezatQtpaWmYNm0apk+fjuXLl8ud5/HjxzLDMODlQQNxcXFVrlVUVGDVqlVwdnaGqqoqgoKCMHv2bACoFoYBL7d+Hjp0SG6T/UrPnz/Hb7/9Bm9vb9y9exfnz5/Hb7/9Vu31iYiI3hQDMSIiIiJ6Lffu3ZNbBXX//v1aw45XtWzZEv7+/mjevDl69uyJY8eOVRlPSUnBo0ePZD5rbW2Nn3/+GVeuXJFea9asGTZv3oz9+/fj4MGDMp9zdnbGRx99hO+++w7l5eXo2LEj+vTpg9OnT0NHRweenp4oKyurVunWrl07ZGdnQywWQyQSyWys379/f2noVamyt1dxcTG0tLSQmZmJx48fVwvE4uPj4ePjU62Z/o4dOxAQECDzvQAvt3Pq6+tXuXbx4kUsWbIES5cuhY2NDUaOHIkBAwZg8+bNMiu8Kvn4+MgdA1Al4IqNjYWbmxu+/PJLLFy4EHfu3IGtra107O9hWKXCwkI8fPhQ7msUFxfj2LFjKCkpqXK9qKgIx44dq7FSjoiIqK4YiBERERHRa0lJSZE7VlFRgfT09NeaT09PD1euXMGIESMwZswY/PTTT9J+WdHR0TU+6+rqCnd3d4SGhkqvjR8/HuPGjcOsWbPw/PlzAEBWVhYuX76MQ4cO4ezZs/j8888RFRWFffv2AQC6dOkCBQUFXL58GS1btpS5jbKysX5ISAiaNWsmMxBTVFTEwIEDsXXrVpw/fx7W1tb4/PPPERkZKQ3KKhvy/z0QO3jwIFRUVODu7i69VlxcjJ9//hnt27evckpjJVVVVQwZMqRKyBUZGYkxY8ZgwIABmDp1KgYOHAgHBwccOXJEbuN9iUSCFStW4MSJEzLHK6mrq0vX6ujoiJiYGFy/fh0///wzlJSUpPfV9nugpvGwsLBqYVilgoKCWqvliIiI6oKBGBERERG9lspQ5E3Hi4qKUFRUVOWaiooKDhw4gO+//x5ff/01Pv30U5SWltZ4CiQAjB49GpaWlhg0aJC0CTwAbN68GXp6ehg3bhwiIyOxefNm+Pn5ISoqCkFBQbh16xY8PDzwww8/oKSkBAKBALNnz8b58+fRv39/XLp0qVqFk76+PszNzaWN9WUFYpXvLykpCffu3UOLFi2gpqaGqKgo2NjYAHi5XdLAwACWlpbSZyQSCfbt24fhw4dX6X22fft2JCcn45tvvsGxY8dw4cIFhIWFoaysDD169MDs2bNhZGQkvT8nJwdDhw6FkZERNm7ciEGDBkFbWxtnz56V+3MpKyvD9OnT8c0336BLly7Vto9WUlVVRZMmTfDJJ59g/PjxGDJkCB4+fIju3btXu7fyMAJ55PV3q3wPNaltnIiIqC4YiBERERHRa6mpT1ZsbCx++ukn5OfnVxuLiYnB9u3bsXr1aqxevRo7duxAbGysdFwgEOC7777DgQMHcOjQIfTt2xdNmjSpcYufo6Mj/vrrL5SXl2Pw4MEoKCgA8DJwOXToEIKCgnDo0CGUl5dXe9bQ0BAlJSXYsWMHAGDs2LHQ0tJCeno6RCIRDh8+XO2Z9u3b4/79+zUGYoWFhdJf6+joQCKRICoqqlr/sFff1/379/H48WNMmjRJeq2oqAg///wzxo0bh5iYGPz111+oqKiAr68vlixZgp49e1YJnioqKjBu3DgkJSXhyJEjmDRpEvT19bFp0yYoKirKXGtubi4GDx6MvXv34vvvv8eBAwdw69ataveJRCJYWVmhffv28Pb2xuHDh7F//36Zp4P6+/tj4sSJcoMrBQUFODo6yhwrKipCUFCQzLFKenp6NY4TERHVBQMxIiIiInotNjY26NixY7XrWlpaaN68OTZt2oSWLVvi5MmT0q2PCQkJ2L9/PxITE6X3v3jxAvv370dCQkKVecaNG4erV68iNDQUvXv3lhuePHjwAOnp6WjatCnOnTuHJ0+e4OOPP5b2N3N2dsby5ctrDNTGjh2LFStWoLCwEOrq6pg8eTIOHDiAgQMHytw2WRmIWVtbo6SkBMePH8e2bdtw9OhRREVFAageiKWnpyM7OxvNmzdHeXk5/P39q22X3LdvH4yMjNCrVy/pte3btyM1NRVffvklvvjiC9jb2yMgIADr16+XWWG1fPlyeHt74+DBg/jtt9/Qs2dP9O3bF35+fti4cSPOnTtXpforISEB3bp1w927d7F06VL88ssvMDU1xcGDBzF79mx07twZ9vb2cHFxQW5uLkaNGgUrKys8fPgQY8eOrfb6aWlpmDp1KpydnVFWVobevXtDTU2tyj0ikQgjRoyotv7c3FysWrUKFhYWWL58ucwAs/Lz/HuPNiIiojchkFT+LYWIiIiI6DXExsbi0aNHKCkpQdOmTeHk5ARlZWXExMRgwYIF8PLyQr9+/eDp6Yk7d+5I+3n9nbW1NcaNG1ft+rNnzzBo0CCkpaVh+/btyMvLQ2ZmJrS1tdGiRQvMmTMHSUlJuHbtGuzs7ODt7Y3Bgwdjzpw52LhxIwAgKCgIZ8+elfserK2tMXnyZPzyyy9YuHAhIiIiYGtri88//xzr169HaGgo7O3tpfdfuXIFffr0gaenJ9LS0qqFbT169ICqqiqcnZ0BAKWlpQgICECXLl0QEhKC8vJytGvXDr6+vtJQrLS0FMbGxpg+fTratm2LgoICNG7cGIsXL4azszM6deqEOXPmwNDQEK1atcKFCxeqve7hw4fxySefYPXq1YiLi5NbRdWzZ0/06NEDDx8+xMCBA6GgoIC+fftix44dmDhxIrZu3QoVFRXp/WFhYRg3bhzCw8Px008/4YsvvoBQWPXf1MvLy/HHH3/gm2++AQCsXLkSM2fOhIKCAoqLi/Ho0SOkp6dDS0sLjo6O0NTUlD6bkZGBjRs34vfff0dhYSEmT56MpUuXQk1NDUeOHKlSZaarq4uxY8dWO0CAiIjoTTAQIyIiIqJ/xdmzZ/HZZ58hKSkJy5Ytk1uppaCggOXLl8scy8rKgru7O27fvo2dO3diwoQJ0rH09HS4ubkhNTUV169fR8uWLfHHH39g1qxZ2LBhA+bPn4+UlBT88ccfctf40UcfYdu2bTh16hSeP38OTU1N9OrVC8XFxYiIiMD06dPxyy+/VFmPiYkJli5dKnfO9u3b46OPPoKKigqKioqwd+9eTJ48GQUFBdi1axcWLlyInJwcafDk5eWFLVu2oHPnzlXmKSsrQ7du3TBu3DiYmJjgyZMnePToUbUKqaCgIHTt2hXu7u5o3rw5oqKiYG1tLXNtampqcHR0xKhRo9CsWTM0adIEV69exerVq/HFF19If0ZisRienp5YsmQJrKyscPDgQTg5OVWb79atW5g7dy4ePXqEadOmYeXKlXUKrJKSkrB27Vr88ccfkEgkmDlzJhYuXAgTExPpPRUVFXj69Cmys7Ohq6sLKyuramEcERHRm+KfKERERET0rxg8eDDCwsKwaNEilJWVyb2vpsb5jRo1woULFzBx4kRMnDgR3377rXQbZuXplI0bN4abmxsiIyPh4eGBRYsW4fPPP4eXlxcMDAykzez/TktLC61atcLy5cuRm5srrSqbNWsW/Pz80Lt3bxw8eLDKNsNGjRqha9euNb7vym2hlf21oqKiYGJiAjU1Nfj6+qJdu3ZVqrBOnDhRLQwDXp5Y6efnh5KSEoSHh2PZsmXVwrCUlBQMHToU9vb26Nq1K7777ju5YRjwcjvnmDFj0K5dO5SUlMDf3x9//fUXFi5cKA3DEhMTMXDgQCxevBifffYZ1qxZg6KiIuTl5UnnSUpKwoQJE9C9e3eoqKjA398f27ZtqzUMi4mJwezZs2FpaYkdO3ZgwYIFiImJwbp166qEYcDLoLRFixbo1KkTmjdvzjCMiIjeKv6pQkRERERylZeXIz09XWaT/LpQU1PDihUrauz7lJaWhvT0dLnjSkpK2L59O1atWoX//e9/+OSTT1BcXAwAaNKkCXx8fKCtrQ1XV1c8ffoUq1atwogRI/DJJ58gMDAQI0aMgL29PV7dGPHixQukp6dDSUkJpqammDVrFn799VdkZWVJT2ksKytDQkICrl+/XmU9zZo1q/E9l5SUAPi/QCwyMrJaQ/1KmZmZNZ6aKBQK4ejoCHNzcyxbtqza67i7u6O8vByfffYZ5syZg5kzZ1YLll5VeTplSEgIKioq4O/vjwEDBkjHT58+DUdHR8THx+O7776Duro6AgICcOnSJfz22294+PAh1q5dCxsbG1y4cAE7duzAnTt30KFDhxo/k4iICEyaNAnW1tY4ceIEvvvuO8TGxmLFihXSEC0rKwuxsbGv9XutvLwcUVFRCA0NRXZ2dp2fIyIiYiBGRERERNVIJBLcunULa9euxaZNm7B27Vrs27cPGRkZbzTfsGHDqpyIWElBQQHHjx+HnZ0djh8/DnndPAQCAZYsWYITJ07gzJkz0q2SAGBgYAAfHx9oaGjA1dUVMTEx2L9/PxwdHfHRRx8hOTkZI0eOxMyZM3Ht2jX4+PjA2NgYq1atQnBwMABg2bJlKCsrw6+//gpFRUVMnz4dly5dQrNmzao117e0tKzxvSooKAB4WU0GvKwQs7GxQUJCAuLi4qoEYkePHoWSklKN82VlZcHT07NKVZlEIsHcuXMREBCAn376CR4eHhg0aBA8PT1ha2srd66cnBycO3cO7du3h7+/P1q2bAkAyMvLw9SpUzFixAh0794d06dPr1bVV15ejhMnTmDFihWYPHkyIiMjMXXqVJmVWxKJBMXFxQgMDMTIkSNhZ2eHq1evYu3atYiJicGXX34pDQxzcnKwb98+/Pbbb9izZw/WrVuHU6dOSYNFeSIiIrBu3TocOnQIJ0+exMaNG+Hl5SU9VIGIiKgm7CFGRERERNVcv34dN27cqHZdU1MTHh4e1U4PrIu8vDzcvn0bkZGRyMjIQGBgIFJTU/H111/jwIEDOHXqFEaMGIFNmzbB0NBQ7jy3bt3C1KlTUVFRgbNnz8LOzg7Ay61+PXv2RElJCW7cuAE1NTU4OztDVVUVvr6+0NHRQXBwMDp16oS5c+fi6tWrEAqFuHfvHhQVFbFs2TJ4enri+fPnKC0thYWFBfr3748bN24gJSVF+p6vXbuGbdu2ScOkVxkbG6OsrAxz585F//79ce7cOWhqauL777+Hubk5Ro8ejcTERBgZGQEAOnfuDDs7O5iZmcl8r2KxGFFRUTh06FCV65s2bcLcuXPxyy+/YN26dWjWrBmuXr2K0tJS9OvXD+bm5lUOAwBenuS4adMmeHh44Ndff5VuVb1z5w7Gjx+P1NRU/Pbbb+jevTsOHDgg9/O3tbXF6NGj5Y6HhobC29sbhYWFqKioQHx8PDp27IhPP/0UysrKVe6tbMgvK2ht3rw5PvnkE5mvkZycjO3bt1fZzlrJ2dkZ/fr1k7s+IiIigBViRERERPQ3JSUl8PPzkzmWl5eH+/fvv9G8mpqaGDBgAObPn48ff/wRP/30E5SUlDBixAhoaGhg+/btuH37Nuzs7LBv375q1WKpqanYu3cvfHx8MG7cOIwaNQoeHh64fPkygJdhlI+PDxQVFeHq6oqioiKcP38eSUlJGDlyJEpLS9GmTRv88ssvWL9+PaZPny7dAggAS5YsgYKCAlatWoWmTZtiyJAhiIyMRH5+Pry8vKTraNu2LU6ePImCggKUl5cDeFkRZWdnh3HjxqGoqAhCoRA6OjpITk5GQUEBmjdvDj8/P1haWkrDsCdPnuDu3bvo06cPtLS0ZH5mYWFhWL16dZVr165dw/z58zFjxgzs2LED2traOHv2LIqLi9GrVy88ffoUS5cuxYABA/DkyRM8ePAA/v7+8PT0xKZNm7BhwwaIRCKUlZXhu+++Q9euXWFgYIAHDx5gypQpyM3NrfHnqKqqKvO6RCLBwYMHcfLkSRQWFgJ4WS1nYWGB4uJimZVb4eHhcqsOo6KikJSUJHPM399fZhgGAIGBgbVWlxERETEQIyIiIqIqkpKSamyCHxcXV+sceXl5iIqKQnx8vNxtkE5OTrh9+zZ27NiBc+fOYfHixVi0aBH69++PSZMmYdCgQYiPjwfwsrppz549iImJkT6vqqqKXr164dtvv8X27dsBAE2bNsW1a9cgEAjg5uYGdXV1nD59Gjdv3oSHhwckEgkWLFiAvn37YsWKFZg1axa+//57PHnyBLq6uvjiiy+wadMmvHjxArNmzUJUVBQcHByqbJvU1taGpaUlYmNjsWbNGty5cwePHj3CqFGjoKamhsLCQggEAmhrayMyMhIApIHYq9sl9+/fD21tbQwbNgxDhw6t8rmWl5fj3r176Ny5M5o2bSq9Hh0djVGjRqF79+4IDg5GXl4eLly4AIlEAjc3N8TFxcHHxwcAMHToUNy8eRMBAQEIDAzE1atXMXXqVADA06dP0a1bN6xcuRLfffcdbt68CSsrKwCAurp6jT9bPT29Kt+LxWKcOXMGzs7O0i2of5ebm4t79+5Vu/7ixYsaX0veeOV2WVnKy8uRlZVV47xEREQMxIiIiIioitp6WsXFxcltBF9eXo4///wT69evx6FDh7Br1y78/vvv0mDr74RCIaZOnYonT55g1KhRWLJkCaKiorB27VqEhITA3t4eW7duxd27d1FUVCRzjr59+2LWrFlYvHgxxGIxTE1Nce3aNVRUVMDV1RXW1tbYtWsXdu/ejZUrV0IoFGLv3r0Qi8V49uwZTE1NMW3aNIjFYnz++edQV1fHypUr0atXLzRv3hwqKiq4dOkSUlJSpK/Zvn17xMTEoKSkBCUlJUhISJCOVVZH6ejoICoqCgKBAMbGxggKCpIGYmKxGPv378eYMWOgoqKCnTt3YteuXYiKisKJEyewa9cuxMfH47PPPpPOm5+fj6FDh0JHRwcikQjh4eE4d+4cNDQ04ObmhqSkJFy7dg3x8fHo3r07NDU1kZWVBT09PQQGBqJr166QSCTYsWMHnJyckJGRAV9fX3z77bcQiUSQSCT4448/4OTkJD0p8++UlZXRunVr6c/64MGDcHR0xPDhw6GnpwdNTU15v20QHR1d7Xt/f3+59wNVq9EqKipw69YtzJ8/H3fu3KnxudpCPSIiIgZiRERERFSFkZGRtCG8LIcPH4axsTGmTp0Kf3//KhVgFy5cQHBwcJVrWVlZOHDgQI2nKTZu3Bjbtm2Dn58fysvLpZViw4YNg4eHBy5evCj3WaFQiNWrV2PdunVwd3dHQUEBzM3Nce3aNZSWlsLV1RW9evXCjz/+iG+++QaHDh2CoaEhdu/ejQsXLmDgwIG4ffs2/vjjD2hpaWHp0qXYsWMHYmNj4eHhgQcPHkBBQQGHDx+Wvma7du3w6NEj6OvrQywWVwmQCgsLIZFIpIGYubk5Hj16hPLycmkgdvPmTcTFxWHixInIzc3F+vXrIRQKoaamhsjISCQmJmLLli1QVFQE8DJAmzRpEqKjo+Hk5IRr167hxIkTMDY2Rs+ePZGeno7r16/j1q1bGDp0KMzNzREVFYUBAwbA19cXZmZmSEtLw/DhwzF9+nR88skn0n5qAPDo0SM0b94cs2bNgkQiga2tLQwMDKp8zurq6vj444+hoKCA7du3o2XLlhg/fjzMzMxw+/ZtbNmyRe7PqPLnVFhYiP3798PV1RXNmjXDnj175FYQKisrw9LSEleuXMGsWbNgYmKC7t2748SJEzX+/rS2tq4xmCMiIgIYiBERERHR3wgEAgwePFjadP1VLVu2hLe3N7766itcvXoVzs7OcHJywubNm5GUlCR3y1xpaSkCAgJqfe3OnTsjICAAGzZswPHjx3HhwgUsWbIE+fn5NT43efJkeHl54fLly+jRowcSExNhaWmJa9euoaioCK6urpg2bRomT56MKVOm4ObNmxg0aBDmzZsHb29vzJkzB2fOnEFwcDDmzJkDXV1d/Pjjj5g8eTJEIhGsra2rbJts3749ioqK0LRpUxQVFSE5OVna06qgoAAVFRXSLZOV2yU1NDTQqlUrAMC+fftgZWUFFxcXbNiwAUVFRRg0aBD27NkDoVCITz/9FF26dJG+3v/+9z+cOnUKgwcPxsmTJ7Fjxw60atUKPXv2RG5uLq5du4bdu3dj9uzZsLKywuPHj/H999/j2LFjUFdXh7e3N1q1agVfX1+cOXMG27Ztg4aGBgoKCjBq1Cg4Ojri2bNncHd3R1paGhYsWICZM2diwoQJ6N+/P0aNGoUZM2bgzJkzsLKywsyZM9GmTRsEBQXhjz/+gJeXF1q3bo309HS5P6MHDx7A0NAQEydOhEAgwL59+xAeHo7evXtXu1cgECAhIQFmZmbo06cPvL29MX78ePj5+SE+Ph6//vorOnbsWO05XV1dfPTRRzX/JiMiIgJPmSQiIiIiOdLS0nDnzh0kJiZCVVUVrVq1gpOTE4TCl/+mWlFRgcuXL2Pr1q04e/YsmjdvjrFjx8qdz8rKCuPHj6/z6yclJWHhwoU4fPgwRo8eLT1N8u9KSkqwbNkyaGlpISQkRBqI/PXXX2jdujWioqLQs2dPaGtr49KlS5g0aRKCg4Nx69YtBAUF4fnz51Xm69ChA6KiovDFF18gPDwcv/zyC86dO4e0tDSEhYXBzs4OeXl50NbWRteuXZGUlISnT58iJSUFTZo0wZgxY3Ds2DEcPnwYK1asQM+ePREfH4/CwkJcvnwZBQUFMDQ0lPZMa9KkCYqKitC7d2/cvn0bysrKiIyMlPbqOn36NEaMGIFhw4bhzJkzWLlyJcaPHw83NzeUlZXB29sbP/zwA44fPw4TExNkZmZi3759cHd3R1FREZYsWQJPT08MGDAAu3btgqGhISQSCTZv3oxFixahuLgYVlZWOHnypHQ75KtycnKwadMmrF+/HllZWRg/fjyWLFmCzMxMbNy4EadOnYKmpiamT5+O4cOH4+rVq9Ua3r948QJXrlzBxIkTMWnSJFhaWlYZj4qKgre3N168eIHIyEjpqaAjR46Eu7s72rZtC4FAUG1tcXFxCAsLQ2lpKZo2bQpHR0dpVR0REVFNGIgRERER0T+WmJiIXbt2yTxJsJJIJMLs2bNr3O4mi4+PD+bOnYtOnTrBwsKiyphAIMCBAwcgkUhw6NAhdOjQAYmJiRg8eDAiIyNx5MgRDBo0CE+ePEHPnj3RuHFjnDlzBkOHDoW9vT3s7e1lvuaAAQMwcuRIuLi4YOHChejYsSM0NDQwd+5c/PzzzwAAOzs76Ojo4OHDhygoKMCDBw/QunVr9O/fHxcvXsS5c+fg7u6On3/+GStXrsTs2bPxww8/4ODBgxg/fjyePXuGEydOYOnSpXBzc5M2w9++fTumTZsG4OVWxs6dO6NNmzbw8/PDjBkzsGTJEri5uQEATp06hXnz5iEgIACqqqpo3LgxPD09YWpqitzcXEybNg3R0dH49ddfMXv2bAgEAgQHB8Pd3R3R0dFQVVXF+vXrMWPGjGqBU3p6OjZs2ABPT08UFxfj008/xfz583Hv3j1s2LABQUFBaNGiBT777DNMnDgRGhoaKC0txdGjR3H//n2IRCKUlpYCAAYNGoQ+ffpIw1TgZU+08+fP4+TJkzh37hwKCgrg4OAAd3d3uLu7w8HBQWYI9iqxWIySkhKoqKjUei8REdGrGIgRERER0VshkUiwbds2JCcnyxw/dOgQEhISMGHCBMybN09uGCVLSUkJ1q5dC29vbzg4OEBbWxthYWHIycnB4sWL8b///Q/BwcFYuXIlFi1ahKKiIowfP17a4P+zzz5DREQEevbsiSZNmmD37t04efIklJWVZb5ekyZNIBKJMGPGDISEhODTTz9FWloaJBIJYmJiIBQKMXHiRNy9exdRUVEAgPPnz2PAgAHo0qUL/Pz8cPr0aQwfPhzbtm3DjBkzcOHCBfTr1w/9+vVDUVERLl++DH19fRQUFMDa2hrx8fFo3bo1fH19IRQKkZ6ejo4dO0IkEiEhIQF9+/bF6tWr0adPHygqKmL37t2YOnUqkpOTUVRUhOHDh8PJyUl6QqhYLEZ0dDSWLl0Ke3t75OfnY8KECThz5gwEAgHGjBmDnTt3Qk1Nrcp7f/HiBX799Vds27YNAoEAHh4emDhxIk6fPo0tW7YgJSUF/fr1k57WKRQKERISgt27d+PgwYPSdX/66acYM2YMdHR0pHPn5OTg7NmzOHHiBC5evIji4mK0bdtWGoK1aNGiTr8fSktL4ePjg+DgYJSWlkJDQwOdOnWCi4tLldCNiIhIHv5pQURERERvhUAgwLBhw6oFLABw9+5diEQiuLm54cyZM3BwcECvXr3g5eVVY1VZJWVlZXz11VfYv38/EhMTsWrVKmRkZCA5ORkjRoxA3759sWDBAixduhT9+vVDbm4uTp48iS+++ALz58/HvHnzYG1tDR8fHyQnJ2PWrFlywzDgZcXb5MmTYWVlhW+//RazZ89GXFwc4uPjcePGDQD/d9JkpaSkJACQ9jvLyMgAAGRmZkIgEKBTp07SrYPjx4/Hli1bkJ+fj7Zt2yIyMhLFxcX4448/IBQKUVZWhtGjRyM7OxsZGRlwcnLCDz/8ADc3N6ioqEgPEEhPT0deXh5mzJgBe3t7aRgGvGxib2VlJa300tPTw5kzZ9CiRQuEhYXh8OHDVX5Wz58/x8yZM6XN7hctWoRTp06htLQU33//Pe7evYtRo0YhPDwcFy5cQMeOHbF582a0a9cOTk5OOHz4MCZPnozQ0FD4+/tj5syZ0NHRQUZGBnbt2oWBAwdCX18fEyZMQEpKCv73v//h+fPnuH//Pr766qs6h2ESiQSHDx+Gv7+/tAItPz8fV69exYULF+o0BxEREQMxIiIiInprDAwMMHv2bLi5uaFly5ZwcnLC+PHjMX/+fNjY2MDb2xt5eXno2bMnUlNTMWzYMFhbW+PXX39FVlZWrfNbWFjAy8sLXl5eSExMRFxcHLp06YJffvkFFy5cgKenJ0JDQ+Ho6Ahvb2+sWbMGW7duxZYtWzBkyBA0bdoUPj4+iI6ORklJidzXefHiBby9vfH999/Dy8sL1tbW0NbWho6OjrS5frt27VBWVgaBQABNTU3pSZMFBQUAXvZgE4lEePr0Kezt7aGjo4MDBw5gzJgxSE5ORk5ODr744gtoampCQUEB8+fPl/bwWrhwIW7dugVVVVXo6elh7dq1GDBggPQUzDFjxqCsrAw5OTn47bff0LVrV7nv5cKFC1i4cCFEIhF27tyJiIgI2NraSsfDwsIwYcIE2NjY4MyZM/j+++/x22+/ISAgAFeuXEHjxo3RunVrODs7Q09PD/7+/hgzZgyMjIzw+eefw9TUFF5eXkhISMCaNWtgb2+P5ORkbNmyBb1794aBgQGmTZuGgoICrF27FvHx8bhz5w4WLVpUrZdYXURFRVUJIl8VEBBQp99HRERE3DJJRERERO9MYmIidu/eje3btyM2NhbW1tbQ1dVFcHAwFBUVMX78eMybNw8ODg61zlVYWIiVK1dizZo1MDQ0hJKSEqKjozFr1iw8e/YMFy5cwPz587Fq1SrcvHkTo0aNgpmZGf766y9kZWVh2bJlcHZ2ljm3r68vwsLC8OjRI3Tt2hWmpqawtbXFtm3bIBKJkJycDIlEAi0tLTRq1AgKCgpwd3fH5s2bYWJigsTERMybNw8XLlyAsrIyXFxcsHHjRnzzzTfQ0NCo9nr+/v44cuQItLS0sHPnTkybNg3m5uYoLi7Gnj17MGnSJDRp0gSjR4/Gd999Bw0NDQiFQpw4cQLt27fHmjVroKSkJPezio2NxZYtW6pUxd2/fx8rV67E6dOnYWpqijlz5kAsFmPr1q2Ii4vDokWLoK6uXm0uiUSCS5cuYeTIkRg/fjyaNGkCAIiPj8epU6dw8uRJ3L59G0KhEK6urhg5ciSGDRsGAwODWn+mdXHhwgX4+/vLHR8yZAjatGnzVl6LiIg+XAzEiIiIiKhOMjIy4O/vj6SkJKipqaF169awtbV9o2bmlSdU7ty5E/n5+bCyspKernj16lV07twZ8+bNw+DBg6GgoFDjXBEREZgzZw58fHzg4OCAyMhImJmZYdCgQdiyZQtsbW1x+PBhSCQSDBo0CEVFRTh79iyEQiFWr15d5fRKiUSCmzdv4saNG1BVVcW4cePQp08fjBo1Cvv378eECRMAAIcPH8bYsWPRqlUrZGdno7S0FC4uLjh9+jQaN26M7Oxs9O/fH2VlZbhy5Qp2794NNTU1hIeHy3wPAoEAS5cuRWBgIFxdXWFoaIisrCxs374d8+bNg7GxMTp16oTt27dDWVkZFhYWOHv2LF68eIGJEydiwIABMDY2ljm3UCjEsmXLpKcv3rp1CytXrsTFixfRvHlzTJo0CXFxcThw4ADKy8sxduxYjB8/Hn5+fnI/c2dnZ/Tr1w/Pnz/HyZMnceLECdy7dw+Kioro27cv3N3dMWTIEDRu3LjGn92rsrKykJ+fj8aNG1fZyllSUoLg4GD4+fnB19cXYrEYTk5OcucZNmyYzNMyiYiIXsVAjIiIiIhqFR0djUOHDqG8vLzK9TZt2mDw4MFvFIplZ2dLA7FX5ebmwtvbG48fP4aFhQXmzJmDqVOn1ng6pUQiwdGjR/H5558jNzcX+vr6iI2NxdixYxEUFIT4+Hhs3LgRQ4YMwbBhwxASEoIDBw7A3Nwco0ePRvv27aGqqoqLFy9CS0sLsbGx0NTURHp6Oq5cuYLFixdDQ0MDIpEIgYGB6NatG86dO4cpU6bg/PnzKC0thY2NDfz9/aGhoQGJRIKmTZvC3t4ep0+fRmRkJPbs2VNjFVf//v0xdOhQAC9PeNy4cSO+/fZbmJubw8jICN7e3gBenoC5e/du/Prrr/j111/h5OQEAwMDudVujo6OGDZsGC5evIiVK1fi9u3baNWqFQYOHIgHDx7g4sWLaNKkCWbNmoWZM2fi6dOnOHLkiLTySxaBQIDTp0/jwYMHUFFRwYABA+Du7o6PPvoI2tracp+TJSsrC3/++ad0G6RQKISOjg4SEhLg6+uLwMBA6UmSHTt2RNeuXeV+jgoKCvj8889lVrYRERG9ij3EiIiIiKhGYrEYXl5e1cIwAAgODsazZ8/eaN4rV65UC8MAQEtLC82aNYOKigpEIhG+/PJLGBsbY+bMmQgNDZU5l0AgwNixYxEREYHp06cjPj4ehoaGOH36NIqLi+Hm5oYZM2Zg7ty5OHnyJIYMGQJ3d3dcvXoVhw8fxoULF/DkyRMYGBggJycHioqKyMzMhIGBAWbOnImvv/4at27dQpcuXZCXl4eLFy8iNTUV7du3R3p6OnJzc6VN9UtLS6Gmpobnz5+juLgYenp6MDc3R2xsbI2fx/Lly1FcXIyUlBQsX74cy5cvh7m5OQDg0qVLkEgkWLRoEX755Rf069cP69evh4uLC4KDg3H37l2EhIRUm9PAwABlZWXo0KEDBgwYgJKSEnh4eKC0tBSrVq1Camoq9u7dCz8/PygpKcHFxQXdu3fHyZMna1zrjRs30KJFCxw/fhzp6ek4deoUxo0b99phWHFxMXbu3FmlJ5hYLEZmZiaSkpJgbGyMVatW4d69e8jJycGNGzewYsUKuRVgbm5uDMOIiKhOWCFGRERERDWKi4vD7t275Y47OTlJK5vqSiwW46effpJ7wqSqqirKysqkvcYMDAxQVFSE3NxcuLq64rPPPqtxO+WDBw8wa9Ys3L17F4aGhkhOToabmxvu378PbW1t7N+/H1euXMH//vc/TJ06FVOmTMGAAQNgb2+P+Ph4KCoqSkMaRUVFfPbZZ/D19UV5eTni4uKQnp6O9evXo1OnTtLKLJFIhOLiYohEIpiYmODFixdo06YNTE1NMX36dHz55ZcYOXKkzPWWlZXh119/lQZWhw4dQrNmzZCeno6UlBRIJBLs2LEDubm5WLJkCfT19VFaWoqMjAxIJBJUVFTA0dERW7ZsQXZ2NsrKyvDixQts3rwZYWFh6Ny5M0xNTXHx4kXk5eVh+PDhmDVrFtLT07F9+3b4+PhAIBBALBZDRUUFo0ePRosWLaqcWllJQUEBs2bNeq3tkJXy8/Ph7+8PPz8/+Pn5IS8vD3369JF7/9y5c2W+jlgsxv379xEUFCStCOzUqVOVwwKIiIhqwkCMiIiIiGoUFRWFQ4cOyR0vLCyEra0tunXrhmbNmtVp+2RFRQVWrFghd1wgEMDDwwONGzfG5cuXsXXrVpw9exYikQja2tpITU2Fubm5dDulrq5utTnEYjF27dqFpUuXori4GBUVFdDS0oK+vj4iIiLw7bffwszMDDNnzkS3bt2wePFijBo1Cra2toiIiICenh5iYmIgEolQUVGB33//HXPmzMGYMWNw4sQJODo6wtfXF5qamtJgLy4uDmZmZjAzM0NcXBzU1NTw7bff4v79+4iIiECHDh1gZmZWba2HDh1CVFQUhg0bhkuXLsHKygrR0dEoKipCo0aNsGvXLnh6euLixYto0aIFnjx5AmVlZZSUlEBLSwvbt2/H6NGjUVJSgj179mDVqlWIjo6Gi4sLlJSUcPPmTWhpaWHatGno2bMnzp07h/379yM/P1/683J1dUXXrl2RmZmJs2fPIjU1FWPHjpVWqQGAtrY23N3dYWpqWuvPWCKRIC4uDr6+vtIALCQkBGKxGDo6OnB2doaTkxNUVFTkzjF8+HA4OjrW+lpERESvi4EYEREREdUoPz8f69evh1gsljn+8OFDnD59GhKJBIaGhujatav0q3Xr1hCJRDKf27lzJxISEmSOPXnyBEePHoWrqytGjx6N4cOHo6ysrMoJlY0aNUJubi4UFRUxYcIEzJs3D61atao2V3p6OpYtW4adO3dCW1sbOTk5aNWqFcLCwuDi4oLPPvsMHh4e0NfXxw8//ICpU6fCxsYGISEhaNKkCVJSUqCqqopmzZqhcePGSElJwZMnTyCRSPD48WOMGTMGoaGhEIvFuHHjBnr06AELCwskJiaitLQU586dw4gRIzB27Fjs378f7du3R+vWrdGsWTOoqalh9erVSElJgbOzM4KCgmBhYYHIyEiIxWK0atUKc+bMwbJly1BRUQGxWIyioiKUl5dDKBRi5syZ+P3331FcXIytW7di7dq1SEpKQocOHZCbm4uIiAi0aNECU6dOhVgsxs6dOxEVFQWhUAixWAwbGxu4ubmhuLgYFy9eRFJSEgwNDTFixAi4u7uje/fuyMjIQEpKCtTV1WFpaQmhUHbXlVeb39+5cwd+fn5ITEwEANjY2KBVq1bQ1dVFWVkZnj9/jqCgILi4uMDFxUXu772PP/4YNjY2csdflZWVhZSUFGhoaMDExOSN+toREVHDwUCMiIiIiGrl7e2Ne/fuVbuuqamJWbNmobi4GHfu3MHt27dx+/Zt3Lt3DyUlJVBXV0fnzp2lAVmnTp2goaEBAHj+/DkOHDiAv/91VEFBASNGjICvry+OHTsm3c7n5uaG0aNHY8iQIQgKCpJWjQmFQigqKqKwsBA9e/bE/PnzZW6n9PPzw+zZs/Hw4UMoKSlBWVkZSkpKKC8vx48//ojff/8dWVlZ+P7777FkyRJYWFggPDwcampqKCwshEAgwNSpU7Fjxw60b98eQUFBWLp0KVJTU3HgwAGUlJRg3759mDhxIszMzFBWVoa0tDSsW7cOCxYsgKGhoTQgGjhwINauXYsOHTqgtLQUVlZWeP78OUxNTfH06VMAwIgRI6CpqYm9e/eiSZMmSE1NhUAggEQiQdu2bXHmzBloamrC09MTGzZsQE5ODhwcHBAfH4+MjAz069cPLi4uuHPnDi5duiQNNBs1aoSePXtCIBDg5s2bSE9Ph6mpKdzd3eHu7g4XFxe5oder0tPTcefOHfj6+uLFixfQ19eHtra2tJ9aUVERFBQUkJaWhocPHyI7OxsAYGFhgfbt26NDhw5o2bIlgoODZc6vpqaGzz//XG6gWqmkpAR//vlnlRM89fT0MGLECBgZGdX6PoiIqGFiIEZEREREtRKLxbh69SoCAgKkfaXMzc0xePBgmT2eSkpKcP/+fWlAdvv2bWRlZUFBQQFt2rSRBmSmpqYIDAxEWloaAMDIyAj9+vWrsk0vNTUVp0+fxrFjx3D9+nUIBAL07t0bo0aNgrOzM86cOSOtGqsMr0xNTTFv3rxq2ynLy8uxadMmLF++HGVlZSgpKYGRkRGSkpIwceJEPH/+HPfu3cOiRYuwfv16GBkZ4fnz59IgSiQSoXPnzoiOjkZCQgIMDAzw7bffYs6cOQCAlStX4uuvv4ahoSFEIhGMjY0hEomknwfwsidZcHAwhg4diri4OOjr6yM9PR36+vp48eIFAGDmzJm4dOkSXrx4Ie0RVrnVcPfu3XBxccH69euxadMmlJSUwNLSEs+fP4eioiKGDh0KRUVFeHl5IScnR/qaHTp0gKqqKgIDA5GTkwMrKytpCNahQ4caK6rEYjEiIiLg5+cn3QIZGRkJABgzZozM3l0PHz6Ev7+/NPxq37492rVrBz09vSr3+fj44NatW1WuKSgoYPTo0XWqDjt27BgeP35c7bqamhrmzJkDNTW1WucgIqKGh4EYEREREdVZSUkJMjIyoKamBh0dnTo/VxmovBqQRUdHAwCsra3Ro0cPdOzYET169ICNjY3ccCYlJQWnTp3CsWPHcOPGDSgoKKB3794YMmQIioqKkJmZifz8fDx+/Bi3bt2CWCzGhAkT8Nlnn1XZTpmYmIiFCxfiyJEjUFRUhIKCAsRiMSwtLWFjY4OzZ89i4sSJOHbsGHR1dZGSkoKKigooKCigRYsWCA8Ph66uLjIzM/HHH3/Aw8MDQqEQHh4e2Lx5MzQ0NKCgoIDhw4djz549aNy4MTIyMgC8DM2uX78OHx8fqKuro7CwENra2sjIyICysjKGDx+OY8eOQUVFBYWFhQAAoVCIuXPnYuHChVi7di22b98OiUQCPT09JCQkwNTUFB06dEBoaKg0qKr8bHV1dREWFoaCggLY2trC3d0dI0eOhKOjo9zPuaCgAPfu3ZMGYHfu3EF2djaEQiEsLS3RuHFjlJeXo6ioCGPGjJH7c58+fTqMjY1r/f3x7NkzPHjwAHl5eWjSpAk6dOgAfX39Wp/LyMiAp6en3PHevXujS5cutc5DREQNDwMxIiIiIqoXL168gK+vrzQgq2y4rq+vjy5dukiryNq2bQtFRcVqzycnJ+PkyZO4dOkSHB0dq22tqwyrxGIxysvL0bVrV3zxxRcYMmSIdDvllStX4OHhgefPn0MikUBDQwOlpaXo06cPzp07Bzc3N/j6+kJNTQ35+fnS6jhHR0ckJCQgMzNT2mS/oqICQ4cOhZeXl3QNI0eOxPnz56XBlomJCUaNGoUNGzZAUVFReqpjYWEhGjduDAMDA4SFhUkr0gCgU6dOWLt2Lfbs2YO9e/dCJBJBSUlJ2gtNUVERISEh0sb+urq6MDQ0xPPnz1FcXIzWrVtj5MiRcHd3l3sKY1xcnLTxva+vr3Q+dXV1mJiYQCQSIT09HampqQBebrusrPiqqSm+m5sbunXrVqffD7WRSCTIyclBXFyc9CshIQHKyspyn3FwcIC7u/tbeX0iIvqwMBAjIiIiojcmkUgQHh6OR48eobCwEEZGRujUqZPMUx9rk5OTg7t370oDMn9/fxQVFUFVVRXOzs7SgMzZ2RlaWlrS19+6dStSUlJkzllSUoIHDx7g4sWLAF5WqhkaGmLBggWYPn06dHV1UVJSgrVr1+KHH35AeXk5BAIBKioq0KZNG4SHh8PKygpRUVFQVFRESUkJKioqoKioiLKyMmn/MjMzM0RFRaF169YICQmRvr6pqSkyMzNRUFAAAPj666+xcuVKaWWWSCRCWVkZzM3NkZycjIqKCpSXlwN4GTqtWLECt2/fxtGjR6GioiIN5Jo3b474+Hjk5eUBAJSVldGkSRMkJSWhvLwcHTp0wMiRIzFixAhYW1tL11NcXIyEhATExMQgNDRU2vy+8nADPT09aGhooKCgQLqNVVNTE23atIGlpSW0tLRQVlaGhIQEhIaGSre4ytOrVy907dq1Tj//8vJyvHjxokrgFRcXh9jYWOmvK99v5WfXvn179O/fX+6cnTp1qnGciIgaLgZiRERERPRGJBIJ/vzzTzx48KDKdUVFRXzyySewsLD4R/OXlpYiODi4yjbL9PR0CIVCtG7dWtqkv7IJvSwJCQnYt28fevbsCS0tLdy6dQspKSkQCAQQiUT45JNPsHDhQrRq1QrR0dGYNWuWNDwTiUTQ1NSERCKBsrKydMtjZWClp6eHvLw8lJSUoFu3brh16xYMDAyk4dyr2yQBwMXFBf7+/tJKrsqTHk1MTKS9wyqvjx07Fnl5eTh79ixUVFRQXFwMTU1NqKioSIMqoVAIHR0dZGdnQywWo2vXrnB3d8eIESNgZmYmnS8jIwN+fn64d+8eBAKBtDouIyMDN27cwIsXL6TrVFFRgZ2dHYyMjKSVazExMYiMjJSu29TUFA4ODnBwcECLFi3knhQKAB4eHjAwMAAAaXXXqwHXq18vXryocpKprq4uzMzMYGZmBnNzc+mvK78MDAwgEAjg6emJrKwsma8/Y8YMNtYnIiKZGIgRERER0RuJjIzE4cOHZY41atQI8+bNq7FR++uSSCSIjIysEpBlZGRg/vz5cp8pLCxEQkICIiMjERgYCGVlZbRt2xaFhYV4+PChdN62bdvi66+/xpAhQ3D+/HlMnz69yqmOjRs3RlFREYqLi6uENpXj5ubmiI2NlYZXAGBpaYm4uDhUVFRAIBBAU1MTubm5Vdb36v0AYG9vDx0dHfj6+kqr0LS0tFBQUCANpFRVVVFUVASBQABXV1e4u7tj+PDhMDIyglgsRmRkpLTxva+vL548eQJXV1f06NGj2udTVlaGu3fvori4GFlZWYiOjkZRURGAl4Feq1atpOGXg4ODdH2vkncCaWFhIUJDQ6WB16vvXSQSwdTUFObm5rC1tYW+vj7U1NRgbGyMNm3awMLCQnoaaW0SEhJw8ODBKp8j8Ha3axIR0YeHgRgRERERvZHTp09LQyVZPv744zqdEvhPJCUlYffu3dKthH/34MEDnDlzBgBga2sLQ0NDJCUlISIiAioqKrC0tERCQoJ0K16jRo0wf/58fPrpp9i4cSPWr18vDcAqtxIKBALpNaFQCIlEAll/pRaJRNJqMn19fWllV+VzrwZrGhoaMDQ0xNOnT6VjlYHYq3OJRCL07t0b7u7uGDp0KNTV1REQEABfX1/pV05ODgQCAVRUVFBSUgIFBQUsWrRIbq+t+/fvIyUlRRp6VYZglRVYAKr17vp7pZeBgQE6dOiARo0aITs7G2FhYcjOzpZb3WVoaAixWIyDBw8iNja2ynoMDQ0xceJEqKqq1vzDf0V+fj6CgoKQkpICdXV1tG7dGiYmJnV+noiIGh4GYkRERET0Rg4fPlzlRMO/27dvH1JTU2FsbAwTExMYGxtX+zIxMZFuz3tTYWFhOHHiRLXrJSUl2LlzJ1JTU6GoqIhGjRqhoKAABQUFUFJSgqWlJYqKihAXFwclJSVoaWkhPT0dAKCgoIBBgwZh8uTJWLFiBYKCggCgSrP7ulJQUJBWd/19DoFAAG1tbWRnZ8sck0gkEIlEGDBgAEaNGoW2bdsiLCwMt2/fho+PDyIiIqSnX4rFYumzurq6UFZWRl5eHjQ0NODh4SF3fcbGxujfv7/crYyxsbHVqruaNm1aLeSqDL5MTU2hqalZ6+dy7do13Lx5U+ZYhw4dMHDgwFrnICIielMMxIiIiIjojdy6dQs+Pj4yxwQCAZo1a4aUlBQkJiYiMTERL168kP7v37e36erqVgnJZAVnBgYG1U6SrBQREYHr168jJSUFQqEQLVq0QJ8+faCpqYmQkBDcvn0bt27dwu3bt6U9xNTV1VFYWAixWAxNTU0oKysjPT0dIpEIAoFAWp1lbW0NV1dXHDx4UHpapLz3/Dp/tX61AuzvlJSUMGjQIHTo0AEKCgq4ceMG/P39q/Qkq1QZJr76mb5anaajo4MFCxbIXUdUVBQOHjwo/V5HR0duZZeZmRmMjIykfcheV3l5OXJzc5GTk4MTJ07I/TyVlZWxdOnSt7rlloiI6FUMxIiIiIjojRQWFmLLli3Iz8+vNubs7Cz39EGJRIKcnJwqIdmrX5XXKk9MrCQQCGBgYFBjcKavrw99fX0oKSnJfe1nz55Je5DdunVLWuX2aohU2dvr1ZBLVVUVJiYmNTbxrwyKXq0IqytFRUV07NgRWlpaiImJQVRUVJX3D1TfavkqgUAALS0taGtrQyQSoaSkBDk5OcjPz8enn35apdH+q7S1tdGiRQtpdVflCZ5/JxaLkZeXh5ycHOTk5CA7O1v667p+X3naJgB8+eWXcrdxAsDy5cvfOHgjIiKqDQMxIiIiInpjaWlpOHv2LOLj4wG8DHU6dOiAXr16QSgU/qO5xWIx0tPTqwVlfw/PUlNTq1RmKSoqwsjIqFqF2d/DM21tbQgEAqSlpcHX11e6DfHhw4dytzi++hqvVncZGBigd+/esLKygkAgQHR0NK5evVrl9EhZFBQU0KhRIxQXF8sMFmURiURQUlKCQCBAUVFRlYBMW1tbZmVXo0aN8ODBA5SUlFSZS1NTE2pqatKqrZpCrdzcXLkVcCKRCDo6OtDW1pZ+1fb948ePkZmZKXO+Jk2aYNasWXI/A4lEggcPHiAgIABZWVlo1KgROnToACcnJ1aVERFRnTAQIyIiIqJ/LCsrC4WFhdDT06ux6udVubm58PX1RXR0tHSbo7Oz82s1UwdenpQoa2vm34OzrKysKs9Vnmr49+CscePGyM7OxvPnz3Hnzh08fPiwWpD0Kn19fUybNq3a+y4rK8Pu3buRmJj4Wu+n0t+DOKFQCH19fRgaGkJXVxc6OjpQU1ODioqKdCtpUVGR3EBLRUUF7dq1g6mpKUpKShAaGoqIiAgIhUJpZdnrhlqV36uqqr52EBUVFYVDhw7JHBs+fDgcHR3lPnvhwgX4+/tXu965c2f07dv3tdZBREQNEwMxIiIiInrnMjMzsWvXripb6ACgcePG+PTTT6GmpvbWX7OoqAhJSUm1Bmd/X5OOjg50dXWhoKCAnJwcZGRkVKkgGzVqFOzt7WW+5tOnT3HgwIE6rU8oFEJFRQXKysoQiUQQi8UoKytDUVGR3F5jwMsqr9cNsF79XkNDo96qqoKDg3Hp0iVp/zMlJSW4urrC2dlZ7jNpaWnYvHmz3PE5c+ZAT0/vra+ViIg+LLK7khIRERER/YsuXbpULXgCgIyMDNy+ffuNq3wkEgkqKipQWloq90tFRQVNmzZFkyZNYG9vX2WspKQEubm5SEtLQ0ZGBjIyMpCVlSWttFJQUICamhoKCgqkWxWtrKzkrqdZs2Y1NtsXCAQQiUQQiURQVFSs8r9//3p1/NV7Xt2aWvlaleuNi4urMvYufq2kpAQ1NTVUVFRU+xn/PXgTCAQQCATSCrPi4mI8fPgQv/32m9zX0NHRgb6+vszPEwAiIyMZiBERUa0YiBERERHRO1VeXi5tZC/LzZs3sWPHjhpDrZq+3sYGiMo+XX//0tbWljbtFwgEyMnJqfH1xGJxjeMSiQRlZWUoKytDeXk5FBUVpf+rqKiIiooK6RyVX5XPVV5/NTATCoVVAqRXX7suv/57w/7Xeb7yZNFXw6rS0lI8ffoU2dnZr72Wv39uld8rKCjUGIjJO3SAiIjoVQzEiIiIiOidqi0kEovFyM7OhpKSElRUVKClpSUznPonX4qKijWOyTsQoKKiAjdv3sTRo0dx8uRJpKen48WLF7C2tpZ5f2RkZJUG/JWnX5aWlla7t/IeBQUFiEQiKCsrQ1lZGQKBAKWlpcjNzUVubi4KCwvlfnYKCgrSfmBaWlqv/evK/6183dfx559/Ijg4uMo1JSUltGrVCh4eHm+tauvFixfYsWOH3PGaKvaIiIgqMRAjIiIiondKSUkJRkZGSEpKkjnetm1b/PTTT+94VfJVVFTg9u3bOHbsGE6cOIHU1FSYm5tjypQpGDNmDJo1a1Zli1+l4uJihIWFwcnJCQEBATAwMIC5uTliYmKQmpoKJSUl2NnZoUmTJgBeHkzw4sULZGRkIC8vT+ZaNDU1YWVlBRMTE1hYWEhPkNTQ0ICysrK0qX5leFb56+TkZDx58qTKtZKSEqirq6Njx44wMzNDWVkZwsPD8fDhQygoKNQ5PNPS0oK6ujoePXok9/MLCAjAgAED3srPw8TEBPb29ggLC6s25uDgACMjo7fyOkRE9GFjU30iIiIieufknTAoEokwbdo0GBgY1MOq/o9YLIavr680BEtOToapqSlGjx6N0aNHo0OHDtIKqidPnqBbt26YMWMGGjVqBIlEAmtra5iammLUqFEoKirCL7/8Ai8vL5w5cwZNmjTBmDFjYGBgAD8/P9y4cQP5+fnQ0dGBm5sbevXqhc6dO0MoFCI8PBxBQUEIDw9HdHQ0kpKSatymqampCSMjIzRr1gy2trZo3rw5zM3NYWFhAXNzc6irq0vvTUxMxIEDB1BUVFRlDlVVVaipqSEvL09muPbqr3NyclBeXg5TU1NMnTpV7udpYWGBSZMmvYWfzEsVFRW4desWAgMDUVBQAHV1dbRv3x7dunWDgoLCW3sdIiL6cDEQIyIiIqJ6ERERgStXriAjIwMAYGxsjP79+8PU1LRe1iMWi3H37l0cPXoUJ06cQGJiIkxMTDBq1CiMGTMGHTt2rLaVUiwWo3v37khJSUFISEi10zFTUlLw0UcfISIiAqdOnYK5uTnWrl2LvXv3QiQSYfr06ZgzZw5SUlJw5coVXLlyBXfv3pWGTL1790avXr3Qq1cvGBoaAni53TI2NhbPnj3DkydP8PDhQ0RERCA2NhYpKSkoLy+X+x41NDTQtGlTWFtbw8nJCSKR7A0jAwcORIcOHWr9zCQSCYqLixEfH4/Dhw/Lvc/BwQHu7u61zve6KnuwKSoq1ttJmURE9N/EQIyIiIiI6o1EIkFeXh6EQiE0NDTq5fX9/f1x7NgxHD9+HAkJCTAyMsKoUaMwevRoaaWWPJ6enpg3bx5u3LiB7t27y7wnPz8fo0ePxuXLl7Fz505MnDgRKSkp8PT0xKZNm5Cbm4sxY8Zg8eLFcHJyQl5eHm7duiUNyCq3Ijo4OKB3797o3bs3unfvDk1NzWqvJRaLkZSUhGfPnuHZs2eIiopCWFgYoqKiEB8fj/z8fACAuro6Fi9eLPd95eXloaSkRFpZVlllpqenJzd42rNnD2JjY2WOjR8/vs69vQoLC1FRUQENDQ2GXERE9K9hIEZEREREH4zs7GzcuHEDkZGREIvFsLa2Ro8ePao0dJdIJAgMDMTRo0dx/PhxxMXFwdDQECNHjsTo0aPRpUuXGkOwSjExMXBwcMDEiROxefPmGu8tKyuDh4cHdu3ahZUrV+LLL7+EQCBAfn4+du3ahXXr1iE2NhZ9+vTBkiVL0KtXL2kYlJycDB8fH1y9ehWXL19GfHw8RCIROnXqJA3IOnXqBEVFxVrXnJWVhWfPnuHx48d4/vy53PsKCwtx7tw5xMTEoKCgQHpdVVW1yhbMV3/dqFEjXLp0CdnZ2VXm6tKlC3r37l3r2hITE3Hx4kXExcUBAPT09NCrVy+0bNmy1meJiIheFwMxIiIiIvogZGdnY8eOHVUCHABQVlbGlClTkJCQgGPHjuHYsWOIiYlBkyZN4O7ujjFjxqBr166v1XtKIpGgX79+iIiIQGhoKLS0tOr0zI8//ojvv/8es2bNwu+//y59zfLychw/fhxr1qxBcHAwnJycsGTJEowaNarKtkaJRIKnT5/iypUruHr1Knx8fJCVlQUNDQ10795dGpA5ODjUWF1VUVGBdevWyT2xsm3bthg8eDAkEgkyMzMRGxuLmJgYxMbGVvt1VlaW9Dl1dXV069YNFhYWUFNTg5aWFiwtLaXhWdOmTWVu00xPT8f27dtlnr45duxYtGjRotbPl4iI6HUwECMiIiKiD8KZM2cQEhIicywhIQE7duyAnp4e3N3dMXr0aHTv3l1uD63a7NmzB1OmTMH58+df+/TEnTt3YubMmRg0aBAOHz5cpe+YRCLB1atXsWbNGly6dAnm5ub4/PPPMXXqVJlbSisqKhAcHCzdXnn79m2UlJTAwMAAbm5u0oDMzMys2rN37tzBpUuXql1XVFTEjBkzqlTV1SQ3N1dmUFb569TUVOm9CgoKMDExqVZlVlpairS0NJnzN2nSBLNmzarTWoiIiOqKgRgRERERfRDWrFkjt+JJIpGgS5cucHV1feMQrFJSUhLs7OwwePBg7Nu3743mOH/+PEaNGgVHR0ecPXtWZvj04MED/Prrrzhy5Ai0tLQwe/ZszJs3r8YTOIuKiuDn5ycNyO7fvw+JRILmzZtLwzFXV1fpaZh+fn64desWSkpKALzcpvjRRx/B3Nz8jd6XLIWFhYiLi5MbmCUmJmL+/PnQ0dGRO8fixYurHVhARET0TzAQIyIiIqIPwurVq1FUVCRzTCAQ4Ouvv36tbZGySCQSuLu7w9fXF+Hh4WjcuPEbzxUYGIhBgwZBW1sb3t7ecpvOx8bGYsOGDdi+fTvKy8sxadIkLFy4EDY2NrW+RmZmJq5fvy4NyKKioiAQCNCuXTtpQNaxY0fk5uZCUVER+vr677yRfWlpKTw9PZGXlyf3ni+//BJKSkrvcFVERPShq71bKBERERHRf0BNpxhaWlr+4zAMAE6ePInTp0/D09PzH4VhANC+fXvcuXMHAODi4oKAgACZ95mbm2P9+vWIj4/Hd999By8vL7Rs2RIjRoyQPi+Prq4uRowYgc2bNyMyMhIxMTHYsWMHbGxssGvXLvTu3Rv6+vqYNGkSdu3ahfv376OiouIfva/XpaSkBEdHR7njVlZWDMOIiOitY4UYEREREX0Q0tPTsWPHDun2v0oikQiTJ0+GiYnJP5o/IyMDdnZ26NKlC06ePPnWKqnS09MxePBgPHz4EMePH8fAgQNrvL+4uBgHDhzAmjVrEBkZia5du2Lx4sX46KOP6nQ6ZiWJRILQ0FBpg/7r16+joKAAjRo1gqurq7SCzNrauk7vNScnB9nZ2WjUqFGdDhl4VVFREXbt2oX09PQq11VUVDBlyhQ0adLkteYjIiKqDQMxIiIiIvpgpKam4urVq3j69CkkEgksLS3h5ub2j8MwAJg4cSLOnj2L8PBwGBkZvYXV/p/CwkJ8/PHHOHfuHLZu3YqpU6ciPz8fMTExEAgEsLS0rNZDSywW4+zZs1i9ejX8/PzQsmVLLFq0COPHj4eysvJrr6G0tBT37t2Tbq+8e/cuKioqYGZmht69e6NXr17o1atXtR5mBQUF+PPPPxEZGSm91rJlSwwePPi1+n4VFxfjzp07iIiIQEVFBSwsLNClSxc0atTotd8LERFRbRiIEREREdEHRywWA8BrVUzV5Pz58xg0aBB2796NyZMnv5U5/668vBzz5s3DH3/8ge+++w4KCgrS96GgoABXV1d06dJF5rN+fn5Ys2YNvLy8YGBggPnz58PDw6PGRvW1ycvLw82bN6UBWWhoKACgVatW0uqxrl274siRI0hKSqr2vKmpKaZMmfLOe5IRERHVBQMxIiIiIqIa5ObmwsHBAba2trhw4cK/GvBIJBKsXLlSbh8vd3d3ODg4yH3+yZMnWLt2Lfbt2wdFRUVMnz4dCxYsgJmZmfSe3NxcPHz4ENnZ2dDT04Ojo2OdKrmSk5Ph4+MjDcji4+PRokULfPzxx3KfmThxIiwtLWudm4iI6F1jIEZEREREVIPZs2dj3759CA0NhYWFxb/+ep6ensjIyJA5ZmxsjOnTp9c6R3JyMn7//Xds3rwZ+fn5GDt2LBYvXgwlJSWcPHkS5eXl0ntVVFQwduxYmJub13mNEokEUVFR8PLyQmFhodz7evXqha5du9Z5XiIioneFp0wSEREREclx48YNbNmyBb/88ss7CcPEYrHcMAx42SOtLgwNDbFy5UrExcVhzZo1uHnzJlxcXHDo0KEqYRjwsnfX8ePHUVZWVud1CgQC2NjYoFu3bjXep6KiUuc5iYiI3iUGYkREREREMhQVFWHatGno0qULZs+e/U5eUyAQ1Lh9UVNT87Xm09TUxIIFC/D06VOsWrUKCgoKMu8rKCio0hS/ruzs7OT2aVNQUICtre1rz0lERPQuMBAjIiIiIpLhu+++Q3x8PHbu3PnWmvPXRiAQoE2bNnLHaxqriaKiIpo3b17jPXl5ea89r5aWFgYMGCBzbNCgQVBXV3/tOYmIiN4FUX0vgIiIiIjofRMQEIC1a9di5cqVaNGixTt97R49eiAxMRHR0dFVrtvY2MDFxeWN59XX1/9H4/K0b98eRkZGuH//PrKzs9GoUSPpNSIiovcVm+oTEREREb2itLQU7du3h6KiIvz9/SESvft/QxaLxYiKisLTp0+l/bqsrKz+0QmXpaWl8PT0lFkJ1qRJE8ycOfOdVcIRERHVNwZiRERERESv+PHHH/G///0PAQEBcHJyqu/lvFUpKSk4evQosrKypNcMDQ0xZswY6Ojo1N/CiIiI3jEGYkRERERE/19oaCjatm2LJUuWYMWKFfW9nH+FWCxGdHQ0cnJy0LhxY5iZmf2jyjMiIqL/IgZiREREREQAKioq4OLigtzcXDx48ADKysr1vSQiIiL6l7CpPhERERERgA0bNiAgIAC+vr4Mw4iIiD5wrBAjIiIiogbv6dOncHR0xIwZM7Bhw4b6Xg4RERH9yxiIEREREVGDJhaL0atXL8TGxuLRo0dQV1ev7yURERHRv4xbJomIiIioQdu+fTuuX7+OK1euMAwjIiJqIFghRkREREQNVkJCAuzs7DB69Gjs2LGjvpdDRERE7wgDMSIiIiJqkCQSCQYPHozg4GCEhYVBR0envpdERERE7wi3TBIRERFRg3To0CGcO3cOXl5eDMOIiIgaGFaIEREREVGDk5qaCjs7O/Tu3RtHjhyp7+UQERHROyas7wUQEREREb1rn332GQDg999/r+eVEBERUX3glkkiIiIialC8vLxw9OhRHDx4EPr6+vW9HCIiIqoH3DJJRERERA1GdnY27Ozs0K5dO/z5558QCAT1vSQiIiKqB9wySUREREQNxqJFi1BQUIAtW7YwDCMiImrAuGWSiIiIiBqEK1euYOfOndi6dSuaNm1a38shIiKiesQtk0RERET0wcvPz0erVq1gaWmJq1evsjqMiIiogWOFGBERERF98L7++mukpKTgypUrDMOIiIiIgRgRERERfdj8/Pzw+++/49dff4WVlVV9L4eIiIjeA9wySUREREQfrOLiYrRp0wba2trw9fWFgoJCfS+JiIiI3gOsECMiIiKiD9b//vc/PHv2DMHBwQzDiIiISEpY3wsgIiIiIvo3PHjwAKtWrcLy5cthb29f38shIiKi9wi3TBIRERHRB6esrAydOnVCeXk5AgMDoaSkVN9LIiIiovcIt0wSERER0Qfn119/RUhICPz9/RmGERERUTWsECMiIiKiD0pERAScnJwwf/58rFq1qr6XQ0RERO8hBmJERERE9MEQi8Xo1q0b0tLSEBISAlVV1fpeEhEREb2HuGWSiIiIiD4YmzZtgp+fH27cuMEwjIiIiORihRgRERERfRBiYmLg4OCASZMmYdOmTfW9HCIiInqPMRAjIiIiov88iUSCvn374smTJwgNDYWWllZ9L4mIiIjeY9wySURERET/eXv27MGVK1fg7e3NMIyIiIhqxQoxIiIiIvpPS0pKgp2dHYYMGYK9e/fW93KIiIjoP4CBGBERERH9Z0kkEowYMQJ+fn54/PgxdHV163tJRERE9B/ALZNERERE9J914sQJnDlzBsePH2cYRkRERHXGCjEiIiIi+k/KyMiAnZ0dunbtipMnT9b3coiIiOg/RFjfCyAiIiIiehMLFixAaWkpNm3aVN9LISIiov8YbpkkIiIiov+c8+fP48CBA9i9ezcMDQ3rezlERET0H8Mtk0RERET0n5Kbmwt7e3vY29vD29sbAoGgvpdERERE/zHcMklERERE/ylLly5FdnY2tm7dyjCMiIiI3gi3TBIRERHRf8b169fxxx9/wNPTE+bm5vW9HCIiIvqP4pZJIiIiIvpPKCwsROvWrWFoaIgbN25AKORmByIiInozrBAjIiIiov+E7777DvHx8fjrr78YhhEREdE/wkCMiIiIiN57AQEBWLduHX766Se0aNGivpdDRERE/3HcMklERERE77XS0lK0a9cOSkpK8Pf3h0jEf9MlIiKif4Z/myAiIiKi99rPP/+MiIgIBAYGMgwjIiKit4LNF4iIiIjovRUaGoqVK1di2bJlaN26dX0vh4iIiD4Q3DJJRERERO+l8vJyuLi4ID8/H8HBwVBWVq7vJREREdEHgjXnRERERPRe2rhxIwIDA+Hr68swjIiIiN4qVogRERER0Xvn6dOnaNWqFTw8PLB+/fr6Xg4RERF9YBiIEREREdF7RSwWw83NDfHx8Xj48CHU1dXre0lERET0geGWSSIiIiJ6r2zbtg03btzA1atXGYYRERHRv4IVYkRERET03oiPj4e9vT3GjBmD7du31/dyiIiI6APFQIyIiIiI3gsSiQQfffQRHjx4gPDwcGhra9f3koiIiOgDxS2TRERERPReOHjwIM6fP48///yTYRgRERH9q1ghRkRERET1LjU1Fba2tujbty8OHz5c38shIiKiDxwDMSIiIiKqd2PGjIGPjw/Cw8Ohr69f38shIiKiDxy3TBIRERFRvTpz5gyOHTuGQ4cOMQwjIiKid4IVYkRERERUb7KysmBnZ4cOHTrAy8sLAoGgvpdEREREDYCwvhdARERERA3XokWLUFhYiC1btjAMIyIioneGWyaJiIiIqF5cvnwZu3btwrZt22BiYlLfyyEiIqIGhFsmiYiIiOidy8/Ph4ODA5o1a4arV6+yOoyIiIjeKVaIEREREdE799VXXyE1NZVhGBEREdULBmJERERE9E75+vrC09MTa9euhZWVVX0vh4iIiBogbpkkIiIionemuLgYTk5OaNSoEW7fvg0FBYX6XhIRERE1QKwQIyIiIqJ35scff0R0dDROnTrFMIyIiIjqjbC+F0BEREREDUNwcDBWr16N5cuXw87Orr6XQ0RERA0Yt0wSERER0b+urKwMHTt2hFgsRmBgIBQVFet7SURERNSAccskEREREf3r1qxZg0ePHsHf359hGBEREdU7VogRERER0b/q8ePHcHJywueff45ffvmlvpdDRERExECMiIiIiP49FRUV6NatG9LT0xESEgJVVdX6XhIRERERt0wSERER0b9n06ZNuHPnDm7evMkwjIiIiN4brBAjIiIion9FdHQ0HBwcMGXKFHh6etb3coiIiIikGIgRERER0VsnkUjQt29fREZGIjQ0FJqamvW9JCIiIiIpbpkkIiIiordu9+7duHLlCi5cuMAwjIiIiN47rBAjIiIiorcqMTERdnZ2GDZsGPbs2VPfyyEiIiKqhoEYEREREb01EokEw4cPx927dxEeHg5dXd36XhIRERFRNdwySURERERvzfHjx+Hl5YUTJ04wDCMiIqL3FivEiIiIiOitSE9Ph52dHbp3744TJ07U93KIiIiI5BLW9wKIiIiI6MOwYMEClJeXw9PTs76XQkRERFQjbpkkIiIion/s3LlzOHjwIPbs2QNDQ8P6Xg4RERFRjbhlkoiIiIj+kdzcXNjb28PBwQHnz5+HQCCo7yURERER1YhbJomIiIjoH1myZAmys7OxdetWhmFERET0n8Atk0RERET0xq5fv46tW7di06ZNMDMzq+/lEBEREdUJt0wSERER0RspLCyEo6MjjI2Ncf36dQiF3HxARERE/w2sECMiIiKiN/Ltt9/ixYsXOH/+PMMwIiIi+k9hIEZEREREr+3evXtYv349fv75Z9jY2NT3coiIiIheC7dMEhEREdFrKSkpQbt27aCiooK7d+9CJOK/sRIREdF/C//2QkRERESv5eeff8aTJ08QGBjIMIyIiIj+k9jsgYiIiIjq7NGjR1i5ciW+/PJLtG7dur6XQ0RERPRGuGWSiIiIiOqkvLwcnTt3RmFhIYKCgqCsrFzfSyIiIiJ6I6xxJyIiIqI62bBhA+7fvw8/Pz+GYURERPSfxgoxIiIiIqpVVFQUHB0dMWvWLKxbt66+l0NERET0jzAQIyIiIqIaicViuLq6IiEhAY8ePYKamlp9L4mIiIjoH+GWSSIiIiKq0datW3Hz5k34+PgwDCMiIqIPAivEiIiIiEiuuLg42Nvb4+OPP8a2bdvqezlEREREbwUDMSIiIiKSSSKRYNCgQXj48CHCwsKgra1d30siIiIieiu4ZZKIiIiIZDpw4AC8vb3x559/MgwjIiKiDworxIiIiIiompSUFNjZ2aFfv344dOhQfS+HiIiI6K1iIEZERERE1YwePRrXrl1DeHg49PX163s5RERERG8Vt0wSERERURWnT5/G8ePHcfjwYYZhRERE9EFihRgRERERSWVlZcHOzg4dO3bEmTNnIBAI6ntJRERERG+dsL4XQERERETvj4ULF6KwsBCbN29mGEZEREQfLG6ZJCIiIiIAwKVLl7B7925s374dJiYm9b0cIiIion8Nt0wSEREREfLz8+Hg4AArKytcuXKF1WFERET0QWOFGBERERHhq6++QlpaGnx8fBiGERER0QePgRgRERFRA+fr6wtPT0+sW7cOzZo1q+/lEBEREf3ruGWSiIiIqAErLi6Gk5MTdHV1cevWLSgoKNT3koiIiIj+dawQIyIiImrAfvjhB0RHR+PUqVMMw4iIiKjBENb3AoiIiIiofgQFBWHNmjX45ptvYGdnV9/LISIiInpnuGWSiIiIqAEqKytDx44dIZFIEBAQAEVFxfpeEhEREdE7wy2TRERERA3Q6tWr8ejRI9y7d49hGBERETU4rBAjIiIiamDCw8PRpk0bfP755/jll1/qezlERERE7xwDMSIiIqIGpKKiAt26dUNGRgYePHgAVVXV+l4SERER0TvHLZP0n1BUVIR79+7h2bNnEAgEsLGxQfv27aGsrFzfSyMion9IIpGgoKAAIpEIKioq9b2cD56npyfu3r2LmzdvMgwjIiKiBosVYvTey8vLw65du5CdnV3lur6+PqZMmfJaf5mXSCSIjo5GSkoKNDQ00KJFCygpKb3lFRMRUV2Fhobi2rVryMzMBAA0a9YM/fv3h76+fj2v7MP0/PlztGrVCp9++il+//33+l4OERERUb1hIEbvvTNnziAkJETmmLOzM/r161eneXJzc3H48GEkJydLr6moqMDd3R3W1tZvZa1ERB+y0tJShISEIDY2FiKRCLa2trCxsYFAIHij+R49eoRTp05Vu66qqoqZM2dCW1v7ny6ZXiGRSNCnTx9ERUUhNDQUmpqa9b0kIiIionrDLZP0XhOLxQgLC5M7HhoaWqdATCKR4MSJE1XCMAAoLi7G0aNHMW/ePGhpaf3j9RLR64uKikJQUBCys7PRuHFjtG/fHhYWFvW9LPqbvLw87N27FxkZGdJrISEhsLW1xciRIyEUCl9rPolEAh8fH5ljRUVF8PPzw4ABA/7RmqmqXbt24erVq7h48SLDMCIiImrwGIjRW5eeno4nT56gvLwcFhYWMDMze+PqAbFYjPLycrnjJSUldZonOTkZ8fHxMsfKy8sRFBSEnj17vskSiegfuH79Om7cuCH9Pjk5GWFhYejfvz86depUjyujv7t48WKVMKzS48ePERQUhPbt27/WfDk5OdW2wr8qNjb2dZdINUhMTMTChQsxefJk9O3bt76XQ0RERFTvGIjRWyORSHDlyhX4+flVud68eXOMGjUKioqKrz2nSCSCkZERkpKSZI6bmprWaZ709PQaxyt71xDRu5OamlolDHvVpUuXYGtry8rN90RpaSnCw8PljoeEhLx2ICYS1fxXkNrGqe4kEglmzZoFVVVVrFu3rr6XQ0RERPReeL39DUQ1CA0NrRaGAS+3Q129evWN5+3Ro4fM6xKJBF27dq3THDo6OjWO8z+6id69mrZDi8ViPH78+LXnLC0trbGqlN5McXExamo5WlhY+NpzamhooGnTpnLHW7Zs+dpzkmzHjh3Dn3/+iU2bNqFRo0b1vRwiIiKi9wL/+ZXemsDAQLljwcHB6NOnDxQUFF573hYtWmDEiBG4cuUKcnNzAQCKiorYt28fNDU1sXDhwlrnaNq0KZo0aYLU1NRqY0KhEG3atHntdRE1dOnp6bh27RqioqIAAFZWVujZsycMDAzq9HxtW56Li4vrvJbIyEhcv34dSUlJEAgEsLGxQe/evaGnp1fnOUg+DQ0NqKuro6CgQOa4kZHRG807YMAA7N27F6WlpdXm69ix4xvNSVWlp6dj3rx5GDlyJEaMGFHfyyEiIiJ6bzAQo7cmKytL7lhpaSkKCwtrbOKbmpqKzMxMNGrUqNp/ULdq1Qr29vZIS0uDQCCAvr4+cnNzsWTJErRp0wZubm41rk0gEGDUqFE4cOAAcnJypNfFYjGGDx+Oxo0b1/FdEhEApKWlYefOnVVCrYiICDx79gxTpkypU0BiYmJS43hN1UOvevz4MY4dOyb9XiKR4MmTJ4iLi8OMGTNqrRCl2gmFQri4uODy5cvVxgQCAZydnd9oXmNjY8yYMQO+vr7w8fGBrq4uevbsCWdnZygpKcl8JisrC9HR0VBQUICVlRU0NDRe6zXT0tIQFxcHJSUlNG/eHCoqKm+09v+K+fPno7y8HJ6envW9FCIiIqL3CgMxemsaNWqEvLw8mWNKSkpQU1OTOZabm4tTp05VaaBsamoKd3d3aGtrS68JhcIqQdmKFStw//59jBkzBkFBQbX2E9PT08OcOXMQHh6OlJQUxMXFYeHChTU2F05NTcXt27cRFxcHkUgEW1tbdOnS5YP/Dyii2ly7dk1mhVdZWRmuXLmCCRMm1DqHra0t9PT0ZPb4a9q0KZo1a1brHBKJRGZIA7w8qfDWrVsYPHhwrfNQ7Tp37oySkhL4+flJt6UWFBRg3LhxdQ4vZWncuDGGDBmCCRMm4JtvvkH37t1l3ieRSODt7Y2AgADpNaFQCDc3N3Tp0qXW1ykvL4eXlxdCQ0Ol1xQVFTFw4EA4OTm98frfZ3/99RcOHTqEvXv31rlyk4iIiKihYA8xems6dOggdywiIgIJCQnVrovFYhw6dKjaaWLx8fE4ePAgxGKx3DlFIhEOHz4MNTU1uLu712l7laKiIlq3bo2+ffvi008/hYmJCVavXi3z3oSEBOzYsQOPHj1CTk4OMjIycPv2bezevbvOp1sSfYgkEgkiIyPljj9//rxOfbxEIhEmTJhQLfh69uwZ+vfvX6fTabOysmqsTn3+/Hmtc9BLtfVfEwgEcHV1xRdffIFx48ahd+/eWLduHYKDg9/K65eVldV4+Iqfn1+VMAx4+WfIlStX6tRv7vLly1XCsMrX9PLykvnn039dTk4OPDw80L9//zoF1EREREQNDSvESK6SkhI8fPgQL168gIqKChwcHGqsArC3t0dqaipu3bpV5bqBgQH279+PNm3aYN++ffjoo4+kY0+fPkVKSorM+dLS0hAZGVljY2U9PT2cPHkSM2bMwI8//gh9fX3o6uqiY8eOsLW1rfH9CYVCLF68GJ9++ikeP35c7X5vb2+UlZVVey41NRV3796V2+yfqCGoqcF6XcYraWlpYcKECcjMzEROTg4qKirg5OQEQ0NDuWH1q2oLzeoSqjV0z549g4+PDxITEyEQCGBtbY2+ffvK7b+mqqoKa2trWFtbo0+fPti0aRMmT55c7b6ysjLcu3cPYWFhKCkpgampKVxcXNCkSROZ89YUiEkkEvj7+8t9D3fv3q3x/+eXlpYiKChI7vi9e/f+UZXb+2jJkiXIycnB1q1b+X8HRERERDKwQoxkyszMxJYtW3D+/HmEhITA398fO3fuxOXLl+X+h65AIICbmxvmzZuHvn37olevXpg6dSpmzpyJgIAAdOvWDYMHD8bSpUulQVNycnKN60hKSqp1rVlZWfh/7N11WFTZG8Dx79CtgImYICYoAgqK2KhYYGP32r3Wuqurq65da4HdBTa2KGEhgmJgYVGiopQ08/tjlvnJMiB2nc/z8AD3nnvuuQMzcN95z3vatWuHuro68fHxPHr0iN27d3Pu3Ll3HtutWzeMjIxYuHBhju3x8fFERkbmedydO3fe2bcg/KgkEkm+0xlLly6db6aPIgYGBpQvXx5TU1PGjRvH8uXLC5S1U7hw4XwL55uamr7XOH42Dx48YNu2bfLXO6lUyr1791i/fn2+mXfZhg4dypUrV3JlbmVkZLBlyxZOnTpFVFQUsbGxXLt2DXd3d548eZKrH6lUSkZGRp6/N2lpaXlOyQcUTrt92+vXr/PNfnv+/Hm+x39vvL29cXNzY968eZQpU+ZrD0cQBEEQBOGbJAJigkIHDhzIUXw+2/nz53nw4EG+xxoYGGBnZ4e9vT3GxsZIJBL09fXZv38/CxYsYOHChTRu3JiIiIg864pl09bWznd/TEwM58+fV7jv3LlzvH79Ot/j1dXVGT16NFu2bMkRAMvMzMz3uIJMBxOEH1mjRo1QUcmdZJyZmVng7LC8jBs3Dh0dHaZPn/7OthKJhObNmyvMgNHR0cHe3v6jxvKjO3XqlMKfV3JyMn5+fu883snJibJly7JixYoc269evcrTp09ztc/IyODIkSO5zpn9mppXQExVVTXf2o16enr5jlNXVzffLKm361V+7968ecOAAQNwcHDgl19++drDEQRBEARB+GaJgJiQS2xsrMJ38LMFBwd/UL8SiYRx48Zx7tw5Hj58SM2aNXn27JnCm2qQZQxUrFgx3z7zy9TKXmnuXQYNGoS6ujrLli2TbytcuHC+K9OVK1funf0Kwo/MyMiI3r1758g+KVWqFDExMfzyyy/s27fvvfq7ceMG7u7uzJo1i40bNzJ+/Hg2b95coNpQpqam9OrVi3LlyiGRSEhLS8PQ0JD+/fu/M1DyM0tOTs43S/fhw4fv7ENZWZkhQ4awc+fOHFlaoaGheR4TExOTK/ssO2s4r4CYkpISlpaWefZpZWWV7zg1NTWpWrVqnvtr1aqV7/Hfk99//53IyEjWrl2LkpL4N08QBEEQBCEv4j8lIZfExMSP2v8u9erVIygoCCsrK9q0aUNaWprCf9oPHDhAt27d8p0m865MrXdleoEsM2Dw4MGsWrWK+Ph44P/TPxXR1NSkbt267+xXEH50xsbG9O3bl0mTJjFp0iQGDBjAihUr6NChA126dOHo0aMF6sfX1xcPDw8iIyPJyMjg1atXpKSk0Lt3b3777bcC9VGuXDl69+7N1KlTWbVqFa9fv843qC3wzmCJsrJygfrp378/AOvXr5dve9dr83/3vysgBrKsREVTdWvWrPnOgBiAra2twqmR9vb273zz5Xtx6dIllixZwowZM36YaxIEQRAEQfhcREBMyMXQ0DDfG6Xo6GjS0tLe2U92EePDhw9z5syZHDciRYsWxcvLi5kzZ/LXX39x6dIlLCwsqFy5Mra2tgwdOpT58+fj7+9PgwYN8qwl9q5MrYJmco0aNYrk5GTc3Nzk28zNzenYsSOGhobybcrKyvTt21fcaAs/jKysLO7cucORI0c4cuQId+/efe8pj+rq6qirqwOy58jWrVtp2bIl7du3x9vbO99jExMTOXv2rMJ9xsbG3L59m4sXLxZ4LEpKSlhZWXHlypUCH/OzUldXz/c10szMrED9FClShC5durBq1Sr5mxBly5bNs72Ojk6O11UoWEBMVVUVV1dXDh48SGZmJvXq1aN///60a9euQEXjJ06cyPbt22natCm2trY4ODgwePBgmjRp8kMUnU9NTaVfv35YWloyZsyYrz0cQRAEQRCEb55E+rHFXoQf0sGDBwkKCsq1XSqVsmrVKrS1tZk5cyaurq4Kg2cvX75ky5YtueqQNWvWLFd2lbe3N66urkgkEnbs2EHDhg3l+65fv46TkxMqKip4eXnlmvIilUrZtm2bwrpmt27dwtnZGVdX1wJdc79+/Th+/DgPHz5ETU0txznevHlD+/btUVdX5+DBgwXqTxC+denp6ezYsSPX1DgTExO6dOlSoML4ycnJ3L17l9TUVIyNjTEyMgIgJSWFtm3bcv78eU6cOJFnVmVwcDAHDhzIs/9Hjx7x8OFDvL29Cxy0mDx5Mlu3blVYw0rIKTo6mg0bNuR6kyMpKYkpU6YUOPh/6dIlbG1tOXz4MK1atSIxMZE1a9YozChu3bp1royuqKgojIyMOHToUI6ViPM6j5+fH/Xq1SvQ2AA8PT3p0KEDW7ZsoUePHgU+7nsybdo0Zs+eTWBgIBYWFl97OIIgCIIgCN88kSEmKNSyZUuqVauWY5uWlhbdunXD29sbCwsLevTogaWlJV5eXjkySqRSKZ6engqL8p88eZKIiIgc2xo1akRwcDCVK1emSZMmzJ49m6ysLAAsLCy4ePEienp61KtXL9fKkRKJhM6dO2NjYyO/eVdTU+Ply5fs37+fvn37cv369QJd8/jx44mMjGT79u25zqGtrY2DgwPnzp0TBfWFH8bZs2cV1ol68OABPj4+7zw+ODiYxYsXs3//fo4ePYq7uzvbtm0jNTUVDQ0N9u/fj5WVFS1btiQwMFBhH9nP9bxYWVlx7tw5jh07VrCL+veY8PBwnj17VuBjflYlSpRg4MCBWFhYoKuri76+PuXLl8fNzY1JkyYVuJ/atWtjZWUlL66vo6ND3759c0zbS0tLo127dgqnNxYkQwzAy8sLfX196tSpU+CxvXjxgiFDhtCuXTu6d+9e4OO+J9evX2f27NlMmTJFBMMEQRAEQRAKSGSI/WSysrK4d+8eL1++pFChQpiZmeV7A/Ly5UsiIiLQ0NCgQoUKOQrgnz9/nkmTJuHr64uDgwNz587F1taW6Oho1qxZk2eftWrVok2bNrm2Z2ZmMn36dGbNmkWLFi3YsmWLfFpNXFwcHTp0wNfXl02bNtG1a9dcx2dkZPDmzRu0tbVJTU2lbt263L59GyMjI65evYq+vv47H5+2bdsSFhbG9evXc2W+Xbhwgbp163Lp0iVq1679zr4E4VsmlUqZP38+ycnJCvdra2szbty4PLOywsPDWbduncJ91atXp0OHDgAkJCTQtGlT7t+/z7lz56hevXqOtrGxsSxfvjzPcbZr146RI0cSHx/P1atXC1Qk/NGjR5QvX54jR47g5OT0zvZCbuvWrWPAgAGsW7eOfv36FeiYDRs20L9/f+7du4eJiYl8e2pqKr///js7d+7Mc8GWBw8eYGpqyunTp/Os3whgY2ODqakpO3bsKPC1dO3alZMnT3Lz5k1KlChR4OO+FxkZGdja2pKSkkJgYKB8+rIgCIIgCIKQP5Eh9hN58eIFK1asYOfOnZw8eZK9e/eydOnSfKcVGRoaYmFhgZmZWa7VIOvWrcu5c+c4cuQIr1+/xs7ODhcXF27dupXvOPIqkq+srMzMmTM5evQoAQEBWFpacuHCBUBW+N7Ly4uuXbvi6urKvHnzctU5UlFRQU9PD2VlZbS0tDh48CC6urqEh4fTrVs3srKyyMjI4PHjxzx69EiekfC2X3/9lZs3byosBm5tbY2Ojg5nzpzJ9/oE4XuQkZGRZzAMZFPm8sveunTpUp77bt68KX+e6+rqcuzYMcqUKUPTpk25e/dujrYGBgZ5FkQ3NDSkevXqzJ07l2vXrhU4CFK2bFkMDAxEHbGP0L9/fwYOHMjQoUML/Dh27doVfX19Vq1alWO7uro6tra2PH36NFeGcLbsKZv5vUHz7Nkzrly58l5Bzr1797Jr1y7++eefHzIYBrB48WKCgoJYt26dCIYJgiAIgiC8BxEQ+0lkZWWxc+dOYmNjc2xPSkpix44dpKSkfFC/EokEJycngoKC2LJlC8HBwXTq1Omjxtq8eXOCgoIoU6YMDg4OLF68GKlUipqaGhs3buT3339n4sSJDB8+PN9VJMuUKcP+/fsBOHbsGDNmzGDx4sVs3LiRTZs2sWjRolw3evb29tja2jJv3rxc/amqquLg4CACYsIPITuAnBdNTc18VxlUtFpfNqlUysuXL+Xf6+vrc+LECQwMDGjSpAmPHj3K0d7JyYmGDRuiqakJyArjh4aGcurUKZSVlbGzs6Ndu3ZMnTqV1NTUd16bRCLB2tpaBMQ+0vLly6lRowYdOnTgxYsX72yvqalJv379WL9+fa5gq62tLUCeCyQUZMrksWPHkEgkNG/evEDjj4mJYciQIbi4uCjMKv4R3L17lz/++IPRo0e/1zRSQRAEQRAEQQTEfhoPHjzIcYP6tuTkZEJCQt7ZR2JiIs+ePVN4Q6qkpESPHj0IDQ1l+vTphIWFKewjIyODkSNHMmrUKF6/fp3nuYyNjfH29mbMmDGMHTuW9u3b8/r1ayQSCTNmzMDd3Z01a9bQvn173rx5k2c/9vb2rFy5kooVKyKRSHK0TUlJ4ciRI9y4cUO+TSKRMGHCBHx8fBTeuDVu3Bg/P78C3ZQLwrdMIpHkewO9b98+BgwYkOfrRn7BNJBlhr2taNGinDp1CnV1dRo3bpwjU0hJSYkGDRowfvx4xo4dy6RJk+jSpQsHDhyQZ4XNnj2bJ0+e5Dsd+23W1tZ51i0TCkZdXZ29e/eSnJxM165dC1Q/cfDgwbx+/ZqdO3fm2G5kZESZMmXkWb//VZCAmJeXFzY2NhQrVuyd45BKpQwdOlS+EMyPsIrkf2VlZTFgwACMjIyYOXPm1x6OIAiCIAjCd0cExH4Sed3UFmR/QkICO3bsYOHChaxevZoFCxZw9OhRhTdH6urqjBgxggULFuTap6mpSbdu3Zg0aRLr16/HzMyM9evX5zktS1VVlWnTprF69WqKFi3KxIkT2bt3LxkZGQwYMIBDhw5x+vRpGjVqRExMTJ7jHzhwYL5Za35+fjm+b9u2LWZmZsyfPz9X28aNG5OcnMzly5fz7E8Qvhd2dnZYW1vn2m5jY0OPHj3Ys2cPlStXZvPmzbmmKFtaWubZb3JyMlpaWrm2GxkZcfr0aTIyMmjSpEmuovdKSkro6uqiqqqKs7MzXbp0YeTIkcTExFC1alX69OnDX3/9lee067dZWVkRGRlJVFTUO9sKeStdujS7du3C29ubqVOnvrO9iYkJLVq0YMWKFbl+Z+zs7D44IJaRkcHx48cLPF1y9+7deHh4sHLlSooXL16gY743q1evxtfXl7Vr1yp8vgmCIAiCIAj5EwGxn0ThwoXz3b99+3a2bNlCUlJSju0ZGRls3rw5R92fjIwMLl++zKFDh/Lsr2jRokybNo2OHTuSlJTE3r17WbZsGefOnWPUqFHcuXMHR0dH+vfvj52dHQEBAbn6ePXqFWvWrCE6OppSpUphZGTEzZs3mTt3LqmpqbRs2RIfHx+ePHmCnZ1drtpEb8vvZuHZs2c5pl4qKyszfvx49u3bl6vPGjVqoK+vL6ZNCj8EiURCq1atGD58OC1btqRly5YMHz4cJycnhg0bRmhoKE2bNqV37940btyY0NBQ+bGVK1dWuLiEiooKmzdvxt7eXmEB9bJly3LmzBni4+Np1qxZrmncb1u+fDkSiYQRI0YAMH36dOLj41m4cOE7ry070CeyxD5eo0aNmDdvHnPnzsXDw+Od7YcNG0ZgYGCuNw7s7OwIDAyU1wt727sCYhcuXCAuLq5AAbFnz54xbNgwOnbsSOfOnd/Z/nv05MkTJk6cyKBBg2jUqNHXHo4gCIIgCMJ3SQTEfhIVK1bMc4qTRCIhKiqKXr16UaJECfr168e5c+fIysrixo0bedaOuX79er43swDVqlVj3rx5HDx4kMaNGzN06FCqVauGn58fmzdvxsfHh5SUFOrUqcPAgQNz1CU6fvy4wkyQjIwMxo4dS0JCArVq1eLChQuoq6tTt25dzp8/r3Ac2bWJFFFTU8u1cl3Pnj0pVqwYixYtyrFdSUmJhg0bioCY8EMxNDSkdu3a1K5dW76yK0DJkiXZsWMHx48f5+nTp1hYWPD777+TnJyMRCKhZcuW9OvXj9q1a1OjRg2cnJz49ddf8fLy4uXLl1hbW+Pj45PrfKamppw6dYqoqCiaN29OXFycwnEVLVqUZcuWsXv3bvbt20fp0qUZMWIECxcuzDcrFGSZTUWKFBF1xD6RsWPH0rlzZ/r06cPt27fzbduiRQvKly/PihUrcmy3tbUlNTWV4ODgXMe8KyDm5eVF0aJF81yAIZtUKmXIkCEoKSmxcuXKfNt+r6RSKb/88guFChVSWO9SEARBEARBKBgREPtJKCsr06VLF7S1tXNsV1NTw9XVlZMnT/LgwQPGjx/P2bNnadiwISYmJvKi9HnJb4XKt5UvX57NmzcTHByMmZkZXbp0oXbt2vJl4pcvX87evXsxMzNjxYoVvHnzJt+MLyUlJaytrbl+/TrlypXD39+fatWq0aRJEzw9PXO1Nzc3z7MvCwuLXPVlNDQ0GDlyJBs3bsw1ratx48ZcuHAh39plgvAjcXR0JCQkhIkTJzJ37lzMzc05ceIEIAs8tWzZEmdnZ2xsbFBTU6NmzZoEBATIn5MrV67MNX2uatWqnDhxgvv379OqVatc2anZunbtStu2bRk6dCixsbFMnjwZZWVl/vrrr3zHnF1YX2SIfRoSiYR169ZRpkwZXFxciI+Pz7OtsrIygwcPZteuXTneULG0tERdXV3htMnsgJiamprCPr28vGjZsmWuNy/+a+fOnezbt49Vq1ZRtGjRglzad2fLli0cO3aMNWvWUKhQoa89HEEQBEEQhO+WCIj9RIyMjBg5ciRt27albt26tGzZklGjRlGxYkUAKlSowLRp07h//z7nzp2jcePG77yZvH79er4rPf6XhYUFhw8fxsfHBzU1NRwdHWnRogV16tTh7t27dOrUiREjRtCwYcNcN9BvK1OmDJqamtSpU4f169fLV7FzdnamY8eOLF26NEd7BwcHjIyMcvXz/PlzqlSpovAcQ4YMQUVFheXLl+fY3rhxY9LT0/H39y/wdQvC905TU5OZM2dy/fp1jI2Nad68Oa6urkRHRytsX7RoUU6cOMHQoUMZNmwYgwYNyrUYhaWlJUePHuXatWu0a9dO4Wq3EomEVatWkZyczNixYzEwMGDixImsXr06z8U7sllZWXHlypV8X0uEgtPR0WHfvn1ERUXRp0+ffB/Xfv36yYNo2dTU1LCysso3IKYoQyw8PJzr16+/c7pkVFQUw4YNo0uXLnTo0KGgl/VdefbsGaNHj6Zbt260atXqaw9HEARBEAThuyYCYj+YpKQkzp07x86dO9m/fz9hYWE5blrU1NSwtLSkWbNm1K5dW2FtLSUlJRwcHFi3bl2uKYNvS09PZ8CAAZQrV47p06crrBeUl/r16+Pv78/+/fuJiorCxsaGYcOGMX78eC5duoREIuHVq1d5Hq+rq8uFCxfo2bMn/fv3p0+fPmRkZLBt2zZ+/fVXRo8ezZgxY+QF+9XV1enbty+tW7emUqVKSKVSjhw5gpubG66urgoXCNDX12fQoEGsWLGCxMRE+fYqVapQvHhxMW1S+ClVrlwZb29vNm3axKlTp6hcuTIrV65UGBhXVVVl6dKlrF+/ns2bN9OoUaNcRe5tbW05fPgw58+fp2PHjgrrSxkZGbFo0SI2bdrEsWPHGDVqFEWKFOGPP/7Id6zW1tZER0cTGRn5cRctyJmZmbFlyxb27dvH3Llz82xXpEgRunbtyqpVq3L8btjZ2SlcwTe/gNjRo0dRUlLC0dExz/NJpVIGDx6Mqqoq//zzz/tc0ndl+PDhqKio5HrTRxAEQRAEQXh/IiD2A4mOjmbFihWcPXuWO3fucO3aNbZs2cLRo0c/OEOiYsWKCgtnZ2Vl4evrS/fu3albty4LFy6kfPnytG7dmgMHDigMMEmlUu7evYuXlxdHjx7lwYMHtG3bluvXr7N+/XouXrxI1apVWbt2LR4eHpiYmCgcU1ZWFn/99Rd//vkny5YtY/PmzezZs4c6depw584d5s6dyz///MOyZcvo3LkzycnJgKzYt5WVFV27duWPP/6gaNGi8gUCJk6cqPBco0ePJjExkbVr18q3SSQSGjVqhLe394c8pILw3ZNIJPTq1YvQ0FA6derEsGHDqFu3LkFBQQrb9+3bFx8fHx4/foy1tTWXLl3Ksb9Bgwbs27ePkydP0r17d4WvH3379qVZs2YMGjSIjIwMpk2bxvbt27l27Vqe4xSF9T+Ptm3bMnXqVH777TdOnjyZZ7thw4bx+PFjvLy85NtsbW15/PhxrsBofgExLy8v6tati76+fp7n2rZtGwcPHmT16tUUKVLkfS/pu+Dp6cnevXtZvnz5D3uNgiAIgiAIX5IIiP0gpFIp+/fvlwd/3hYQEMCDBw8+uO8WLVrQqVMnKlasSIkSJTA3N6du3bpUqVIFT09Pdu/eTZUqVXB1dSUiIgJnZ2fKli3L77//zqNHjwDZzc7WrVvZsWMHAQEBXL58mW3btrFz505AdrN79+5d5s2bJ68lFhkZia2trTzLC2S1vbp3786wYcNYtGgRVlZWVK5cmYCAALKysrCxsWH79u0MGzaMffv24eXlRdOmTXn58mWOa1JSUmLXrl3yoNuiRYs4ePBgrmsvU6YMrq6uLFq0SH7DBrJpkwEBAXkWAxeEn4GhoSHu7u74+vqSlJSEtbW1fMGL/6pTpw5XrlyhbNmyODg4sGHDhhz7mzdvLi+e369fvxzPe5AF4dzc3IiNjWXSpEn069cPU1NTJk+enOf4SpUqRbFixURh/c9g+vTpNGvWDFdXV/nr/H/Z2NhgY2OTo7i9nZ0dQK5pk3kFxFJTUzl16lS+0yUjIyMZMWIE3bp1w8XF5UMu55sXGxvL0KFDadu27Q+7cqYgCIIgCMKXJgJiP4iYmJhcxd/fll8WxbtIJBKqVq1Kt27d+OWXX2jfvj3Nmzdn9erVREdHs2vXLooUKcKOHTu4ffs2zZo1o2bNmixZsoQKFSrQsmVL3N3dFdb7uXv3rrwWl4aGBmPHjiUsLIyxY8eybNkyXF1d0dbWpl69ely+fJnJkyczadIkXF1duXr1KpqamtjZ2bFt2zb8/Pxo37493bt3Z/DgwTg6OuLt7c29e/eoW7durvPr6Ohw4sQJdHR0AOjSpQsPHz7MNcZff/2Vp0+fsnv3bvm2xo0by7PkBOFnZ29vz9WrV5k9ezarV6+matWq7Nu3L1dmasmSJfH29qZ3797069ePkSNH5gg0t2vXjm3btrFt2zaGDBmS6/hy5crx999/s2rVKvz9/Zk1axZHjx7l3LlzCsclCut/PsrKymzfvh09PT06dOig8M0YgKFDh3Ls2DHu378PyIKUpUuXLnBAzM/Pj8TExDwDYtkrLmpoaLBs2bKPvaxv1rhx40hJSWHVqlW5FoERBEEQBEEQPowIiP0g3rXi4c2bNzl//rzC+jwfQ0NDg86dO+Pl5UV4eDh//fUXkZGReHl5oa2tjaOjIxEREQoDTdn+O82qUKFC/PXXX9y/f5+uXbvy22+/0adPH5ydndmyZQuXLl2iSpUqeHh4cObMGaZPn878+fNp0KABI0eOxN3dnU2bNmFnZ4ehoSEXLlxAKpViZ2dHQEBAjnOVL1+egwcPIpFISElJwcnJKVfhb3Nzc1q0aMG8efPkN+gVKlSgTJkyoo6YIPxLTU2NiRMncvPmTSwsLGjfvj3t2rXj8ePHOdqpq6uzZs0aVq5cyapVq3B0dOT58+fy/V26dGHdunW4ubkxduzYXEGxoUOHYm9vz4ABA3BycsLa2pqJEyfmOS1cFNb/fAwMDPD09OTWrVsMHTpU4WPcpUsXDAwMWLVqlXybojpi2QExFRWVHNu9vLwwMjLCwsJC4Rg2b97M4cOHWbNmDYaGhh97Sd+k48ePs3HjRhYuXKhwcRhBEARBEAThw4iA2A+iWLFi+S5Hf/HiRerVq0fhwoVp1qwZs2bNwt/f/70DZFKplNjYWKKionJkdoAs+2P8+PGEhIRw5coVOnfuzJUrV7h586Y8C0uR+Ph4hdtLlizJypUruX37NvXr12fQoEH89ddfzJs3j1GjRjFr1ixq1KiBubk5AQEBqKioYGtry5MnT+RTuKysrAgODub8+fOYmJjQsGFDDh06lOM8jRo1YvHixQCEhoYyePDgXGOZMGEC169f58SJE4As86Rx48YiICYI/1G+fHkOHz7M3r17CQwMpGrVqsyfPz/H64VEImHIkCGcPn2amzdvYmNjQ3BwsHx/nz59WLFiBUuWLOH333/P0b+SkhLr1q0jIiKCP/74g7///ptLly6xf/9+heOxtrYmJiaGiIiIz3G5P72aNWvi5ubGxo0bWbNmTa79mpqa9O/fn/Xr18vfuLG1teXKlSs5/v6kp6ejoqKSK/vJy8sLJycnhVlRERERjBo1ip49e9K2bdtPfGXfhoSEBAYNGkSTJk3o16/f1x6OIAiCIAjCD0UExH4Q2traWFpaKtynpqbGli1bCAgIYMaMGairqzNv3jzs7e3fK0AWFRXF2rVrWb58OW5ubixcuBAfH59cWQESiQQrKyuWLVtGZGQke/fuJSUlJd9+hwwZkmcWmampKTt27CAwMJAyZcrQo0cPzp49y7p166hcuTLOzs5MnDiRLVu28PvvvzNnzhz5DVjz5s3p2LEjf/31F0ePHqVFixY4OzvnyFYAGDlyJN26dQNg48aNbNu2Lcf+hg0bYm1tzbx58+TbGjduzLVr13jx4kWe1yYIPyOJREKHDh24ffs2AwcOZNKkSVhZWXH+/Pkc7RwcHAgMDMTQ0JC6devKawqCLBNs/vz5zJo1i9mzZ+c4zszMjBkzZrB48WK0tbVp1qwZkydPVliM38rKCkDUEfuMevbsyfDhwxk5cmSuqZAAgwcPJi4ujh07dgCyDLGUlJQcU/nT09NzTZcMCwsjNDRU4XRJqVTKwIED0dLS+qFXXJwyZQovXrzA3d1dTJUUBEEQBEH4xERA7AfSokULLC0tc/zTrK+vT48ePTA0NMTa2prx48ezatUq3N3dWbBgAZMnT6ZkyZLvDJDFxcWxefNmIiMj5X2npqbi7e2Nj49PnmNSU1PDxcWF9u3b59nm0qVLrF69mgoVKmBpaYmHh4fCG9tatWpx/PhxTp8+TVZWFr169QJg8eLF3L17l1q1apGcnMzZs2eRSqU0atSIqlWrsnTpUlauXImjoyPz589nxIgRDB06lEmTJskLd0skEjZs2IC5uTkAvXv3JjQ0VH5uiUTChAkTOHPmjPzGulGjRgB51i8ShJ+dnp4eS5Ys4fLly6irq1OvXj1++eUXYmNj5W1Kly4tr//n6urKxIkTyczMBGD8+PH8+eef/PbbbyxZsiRH32PGjMHa2pp+/foxY8YM7ty5w8aNG3ONwcjIiBIlSoiA2Ge2cOFCbGxs6NixY656ltm1JFesWIFUKsXS0hI1NbUcwTNFATEvLy9UVVVp0qRJrvNt2LCBo0eP4u7unu/qk98zPz8//vnnH2bPnk358uW/9nAEQRAEQRB+OBKpKKzyw0lISCA6OhoNDQ2MjY1zBMiuX7/Ovn37ch1jaWlJqVKlOHv2LGfPnsXHx4f4+Hg0NTWpV68e9evXz7MGj5qaGuPGjUNNTS3PMUmlUs6cOYO/v7+8H6lUSkBAAF5eXpQqVYrU1FR5tpWuri59+vRh4sSJlCpVSmF/np6eTJkyhbt379K5c2eMjIxYvXo1BgYG/P3339y9e5c5c+ZgYWHB+PHj+e233+SBvXv37jFu3Di6du3Khg0bUFdXB2SLE5iZmREXF0epUqW4c+cO2traAGRmZmJmZoa1tTW7du0CZJkqzZo1Y8WKFQX50QjCTyszM5NVq1YxZcoUNDQ0WLRoEd27d5e/PkmlUhYtWsSECRNwdHRk+/bt6OvrI5VKmTRpEvPmzWPNmjUMGjRI3ueNGzeoVasWEyZM4MGDB/j6+nLv3j00NTVznLt169ZkZmZy9OjRL3rNP5vIyEisrKwwMzPj1KlTOQJcXl5etGrVigsXLmBra4udnR3ly5dn+/btAMyZM4eFCxfmyLjNrul4+vTpHOd5+vQp1atXp3379rlWK/1RJCcnU7NmTQwNDfH19UVZWflrD0kQBEEQBOGHIzLEfkC6urpUrFiR0qVL5wiGpaWl4eXlpfCYoKAgihQpwrhx4zh06BCxsbFcuXKFmTNnoqGhQXh4eJ7nS0tLy1EUWxGJREKTJk0YPXo0zs7OuLi4MH78eA4cOMDhw4exs7MjPj4eZWVljIyMSE5OZvny5RgbG2NjY8OhQ4fk2VzZ/XXo0IGbN2/i5uYmfye9U6dO1KxZk169euHj48PWrVtJS0ujT58+dO/eHXt7e9q0aUN0dDQ7duzA09OT5s2b8+rVK0BWi+3MmTMoKSkRERFB165d5QE8ZWVlxo8fz969e+UrVoo6YoJQMMrKygwfPpzQ0FAaNWpEz549adq0KXfv3gVkz+lx48Zx7NgxLl26RO3atbl16xYSiYS///6b4cOHM3jwYLZu3Srvs3r16kydOpW///6brl278uzZM5YvX57r3NbW1qKw/hdgZGTEnj17OH/+PBMnTsyxr0WLFlSoUEH+5oGdnV2+GWJv3rzB29s713RJqVTKgAED0NXVldd+/BH9+eefPHr0iHXr1olgmCAIgiAIwmciAmI/kbCwsFwrKL7t1q1b8q+VlZWxsrKSB8gcHBzy7btRo0Z06NCBnTt35ltTS09Pjxo1amBhYYGOjg4qKiq0atWKPXv2EBUVJQ+CZWRkoKWlRaFChbhy5Qpt27ZFX1+fsWPH5piOo6KiwsCBA7l37x5//fUXhw4d4ty5c3Tv3p2oqCi6d+9OgwYNGD58OHPmzCEiIoLx48ezcOFCVqxYwc6dOwkJCcHe3l6+Gl6tWrVYv349AIcPH85xg92nTx8MDAxYtGgRIAuIhYaG5phKKghC3oyMjNi1axdHjx7l4cOHmJubM336dHmdwWbNmhEQEICGhgZ16tThwIEDSCQSli5dSt++fenduzd79+6V9zdp0iSqVq3K9OnTGTBgAHPmzJEHuLNZW1vz4sULnj59+kWv9Wdkb2/PokWLWLx4sbxmGMgWQxgyZAi7d+8mJiYGOzs7Hj16RHR0NJA7IHb27Fn5yr9vW7t2LSdOnMDd3Z3ChQt/kWv60gIDA1mwYAF//PEHVapU+drDEQRBEARB+GGJgNhP5F0rSnp4eDB48GDc3NwIDAzMETyrXr16nsfFxcXx6NEjPD09cXV1pWjRopQoUYLOnTvj4eHB69evCzQ+AwMDhgwZwqVLl7h16xYjR46UT1fU09MjKSmJxYsXU7JkSWxtbTl+/Lg840NLS4uJEycSFhbGsGHD8PDwIDY2llatWrFp0ya2bt3K5MmTSU5OZtmyZQwYMICwsDAGDRrE/PnzSU5Oxs7OjqCgIEBWQ2zIkCEAjB49mqtXrwKyFdNGjBjB+vXref78OQ0bNgTA29u7QNcoCIJMixYtuHHjBuPHj2f27NlYWFhw6tQpAExMTLhw4QKOjo44Ozvz559/AuDm5kaXLl1wdXXlyJEjgGzK9oYNGwgJCaFQoUKkpaUxd+7cHOcShfW/rOHDh9OjRw8GDBhASEiIfHvfvn3lq4Ta2dkByLPE/hsQ8/Lyoly5clSuXFm+7fHjx4wbN47+/fvTsmXLL3Q1X1Z6ejr9+/fH3NycCRMmfO3hCIIgCIIg/NBEQOwnUqZMmXz3q6ur4+/vz5AhQ7C2tkZHRwdLS0v69++Pn58fRYoUyXVMdv2w5ORkAgMDmTp1KjVr1iQ2NpY9e/bQsWNH9PX1KVWqFN26dePgwYMkJCQAsqkvb968URioq1KlCnPmzOHJkyccP36c1q1by2+WNDU1uXTpEi1atMDQ0JCJEyfKi3Tr6+szd+5c7t27R/v27fHy8qJw4cKYmpoya9YstLW16dKlC+7u7hQrVoxKlSoxYMAAXFxcMDIywsHBgePHjwPwzz//YG1tLS/QHxcXB8CwYcOQSCSsWLGCYsWKUb16dREQE4QPoKWlxaxZswgODqZEiRI0a9aMHj168OzZM3R0dNizZw8zZ85k+vTpdOjQgTdv3rBp0yZat25Nhw4d5LWlrKys+PXXX1m8eDE9e/Zk6dKlREREyM9TsmRJjIyMCAwM/FqX+lORSCSsWbOGihUr4uLiIn9TxNDQkK5du7J69WpKlixJqVKluHjxIpAzICaVSjly5AhOTk45aswNGDCAQoUKsXDhwq9yXV/C3LlzuXHjBuvWrcu1yIAgCIIgCILwaYmi+t+xpKQkzp07x+3bt8nIyKBcuXI4ODhQsmTJPI85ePCgPAvqbUWLFmXQoEGoqKjw5s0bQkJCuHr1qvwjJCSEzMxMatasSe3atSlcuDB6enrUqlULOzs79PT0cvSXmZlJcHAwnp6eHDlyhFu3bpGeng7IbpYaNGhA3bp1UVNTQyKRYGZmRvPmzfNdLSwuLo49e/awadMm/Pz8UFFRISsri6ysLCQSCXXr1mXmzJk0bNhQfhMVGhrK1KlT8fDwoHz58kilUh49ekTbtm25desWT548oVGjRpw8eRIHBwdUVVU5c+YMa9asoX///sTHx1O+fHliY2OxsbHh0qVLSCQSRo4cyfbt23n8+DFTpkzh0KFD8rpigiC8P6lUysaNG/n111/JzMzk77//ZuDAgSgpKXHo0CG6d+9O6dKlOXDgAKVLl8bZ2RkfHx+OHz+Ovb09KSkp1KxZEx0dHR49ekT79u1xc3OT99+2bVtSU1PlAW/h8wsLC8PKygp7e3sOHDiAkpISV65cwcbGhgMHDrBlyxaePXuGj48PI0eOxNvbm5CQEEJDQ6lSpQqHDx+mVatWAKxZs4bBgwdz/PhxHB0dv/KVfR63bt3C0tKSsWPHMmfOnK89HEEQBEEQhB+eCIh9p968ecPatWtz1cpRVlamV69eeWaDZWZm4u3tTUBAAGlpaUgkEipXroyTkxM6Ojp5ni81NZWbN2/mCJJdu3ZNXvenYsWKWFlZUatWLWrVqoWlpSUGBgY5zhscHMyePXu4deuWfArT2zIyMrC1taVp06byVR/zcv/+fTZv3szmzZt5/PgxKioqZGRkALIshF9++YWJEyfKA3WXL19m0qRJeHt7U6FCBWJiYuR10ry9valSpQoxMTGoqKjI6xb98ccfTJ8+nbt371KtWjUyMzOZNGkSc+bM4dGjR5iamrJkyRL5zXlYWBjly5fPd9yCIOTvxYsXTJgwgQ0bNmBra8uaNWuwsLAgNDSUdu3aERMTw44dO3BwcKBVq1YEBgZy+vRpbGxs8Pf3p379+rRt25bDhw9z8+ZNKlWqBMCMGTNYtmwZz58/z7HYiPB5HT16lFatWjF9+nT++OMPAOrUqUPhwoVp3rw5U6dOJS4ujpEjR3Lp0iWuXr3KokWLmDJlCrGxsWhpaclrzXXv3p01a9Z85Sv6PDIzM7G3t+fVq1cEBwejoaHxtYckCIIgCILwwxMBse/UqVOn8Pf3V7jPyMiIgQMH5nt8eno6cXFxaGlpoaWl9UFjyMjIIDQ0lMDAQHmQLCgoiKSkJADKlSsnD5BlB8sMDQ1ZvHixvM1/+fj4cPbsWcqWLUuTJk3o3r079erVy3PqSFZWFufOnWPTpk3s3r2b5ORkJBIJUqkUJSUl7O3tmTVrFvb29kilUk6ePMmkSZMICgqiTJkyPHnyBBMTE1JTU3n+/DnGxsY8fPiQZs2acfz4cXr37o2bmxtHjhyhffv2gOyxb9KkCd26dePixYtcvnyZ4sWL4+7uTr9+/T7osRQEIScfHx8GDx7M3bt3GTNmDNOnTyc9PZ3u3btz7Ngx5syZw+DBg2nevDl37tzh7NmzWFhYMGrUKNzc3DAwMMDOzk5egP/IkSO0bt2ahw8fUq5cua97cT+ZmTNnMm3aNA4dOkSrVq3YvHkzvXv3ZufOnXTt2pUrV66watUqQkJCuHTpEk2bNkVVVZWjR4+SlZVF06ZNefDgASEhIbmykX8US5YsYezYsfj6+lKvXr2vPRxBEARBEISfggiIfadWrlzJ8+fP89zfunVrTE1N0dPT+6LZEJmZmdy/f5+rV6/mCJRl198yNzenQ4cOeR6fnp7O/v37uXPnjjzjS1lZmfLly9OkSRN69uxJnTp1UFFRyXVsYmIinp6ebNiwgbNnz8oDYyCbEjpkyBAmTJiApqYmu3fvZurUqYSFhWFoaMiLFy+oVKkSd+7cwdjYmIiICCwsLLh58yYNGzbEw8ODGTNmsHDhQlRVVYmIiCA8PJxatWqxc+dOFixYQKVKldi6detneFQF4eeUlpbGggULmDlzJsWKFWP58uW0atWKP/74g9mzZ+Pq6sqCBQto3bo1ERERnDt3jtKlS2Nubo6amhp37tzh0qVL1K5dm+joaEqWLCmvbSh8OVlZWTg7O+Pr68uVK1coVaoUxsbGdOvWjdWrV7No0SICAgJ48OABR48exdDQkIULFzJixAhWrlzJsGHDOHnyJE2bNv3al/JZhIWFYW5uTv/+/Vm2bNkn6TP776eiv5WCIAiCIAiCjAiIfaf++ecfXr58mef+hQsXkpCQgI6ODqVLl8bY2Fj+kf19sWLFePXqFc+ePUNFRYXKlStjbm6OktKnXWtBKpXy8OFDeQaZmppanm0fPXrEpUuXqFu3LsWLF+fRo0dcvXqV+/fv5wiQmZiY0KxZM3r16oW1tXWuMT969IgtW7awdu1anjx5It+upKREgwYNmDNnDpaWlqxdu5YZM2bw8uVLVFVVkUqlaGtrk5CQgKqqKtra2iQlJWFiYsKRI0fo1KkTFy9epEyZMjx8+JDmzZsTGxtLkyZN2Lp1KxEREWI6liB8Ytmrxx47dox27dqxfPlyLl68SJ8+fTAzM2P9+vX07NmTV69e4evry8OHD2natClGRkZUqlSJ06dPI5FIMDY2pmfPnqI+01cQFxeHjY0NGhoaXLhwgRkzZuDm5oapqSlmZmZIpVKioqIYNWoULi4u3Lt3DyUlJczNzenduzcrV6782pfwWUilUnkG3I0bN/ItXVAQ4eHhnD59mkePHgHI30wqVarUJxitIAiCIAjCj0UExL5TXl5eBAQEKNxXuHBhrKysCA8Pz/Hx9OlTwsPDiYqKQldXl759++YqYp+cnIyqqmqu4Jm+vv4nCfRIpVJWrFiRZzDv2LFjXLx4EWVlZbKyspBKpaipqVG9enXMzMx49eoV9+7d4/Hjx2RmZgKyd8ArVqyIo6MjvXv3pmbNmjlWJjt//jwbNmxg27Zt8ppnAMWKFWP48OEMGTKENWvWMHfuXFJTU0lPT0dPT4+4uDj09PRISkpCV1cXbW1t9u/fLw+CdenShf79++Po6Mjs2bOZMmUKoaGh8ppFgiB8OlKplL179zJq1Cji4+OZMWMGDRo0oGPHjiQmJrJ69WomT55Meno6Pj4+zJw5ky1btpCSksKxY8do3rw5zs7OJCUlcfLkya99OT+lGzduUKdOHZydnZk5cyampqY0adKEBw8eYGVlRVxcHOXKlZMvFtO4cWMeP35MSEgIOgDz58OlS3D5Mrx6BRs2QJ8+uU90+zaMGQN+fqCmBq1awaJFULRo3oPbtg169ABtbUhMzLnP3R22boXQUHj9GoyMoGFDmDYN3p5++/QprF8PR47AvXugrAzVq8PUqZBHdpvX2LFoLF5MfS0tVJWUwMwMJkyALl3+3yglBRYvhi1b4NEj0NeHunVh+nSoVk3eLDw8nHNTp1L/zBlKRkWRoaLCw/LlOePkhMuYMSIoJgiCIAiC8B8iIPadio+Px83NLVctLolEQteuXTEzM8vz2PT0dLZt28bjx48V7r906RLHjx8nKytLvk1TUzNHgOy/ATNjY2MMDQ0LFDQLCwtj27ZtOfoHWe2zFi1ayFe4vHz5MpcvX+bZs2fya8v+ddXR0cHExAQVFRWio6OJjo6WB8hUVVUxMzOjRYsW9O7dm+rVqyORSEhOTmb//v2sXLkSPz8/+XmVlJRo1KgRkydPxsvLi+XLl8sfJw0NDTIyMsjIyKBQoUJkZWXh5uZGjx49yMzMZOPGjSxZsgQDAwN8fHxYtmwZQ4YMeedjIAjCh4mLi2Pq1KmsWLECCwsL5s2bx9y5czl37hzTpk1j7dq1qKurc/jwYRo1asSbN28oW7YsV69eZfbs2SxatIiXL1/+nJmciYkFCypdvgwbN8raXb8OGRmQ178Kq1bBmTOytk+fQu/esmMVCQwkcuBAlIKCMFBTI0Jdnc2qqsyIjcWxRQskEglPgoKYbWqKVVQUag8eUFhbG9Xq1aF7dxg5EsqUgQoV4OzZ/4/96lVZcMjPD5KTIT1dFjT6/XfZNS9YAKVLQ9u2SLduRRoRwRt9fR41akT6uHFYmJmhXLUqxMVBZqYsAKXIiBFQsyY8fCgLkmVmwrVrsgAZwD//yIJZzs5Qr57scdu8WTa+9euhb98c3b1atIhC48Zxy8iI6lOmyAJod+5AqVIwfvz/G3boAAcPwsCBUKsWREbCihWyaw0JgbJlATg9ZgyNli4lqmRJrltYoJ6aSp1Ll8hUVubo7Nl0GT48r98MQRAEQRCEn5IIiH3HYmNjOXXqFHfu3CErK4tSpUrRqFEjTExM8j0uNTWVuXPnktePXl9fnx49epCUlCSvlfV2hln2R/b0wOrVq1OuXDmkUikvX75ERUUlz+BZkSJFkEgkREZG4ufnR3h4OGpqalSrVo26desqXF3y5cuX8lpkFy5c4PLly0RFReVqV6hQIXR1dUlOTubVq1fygJuamhqVKlWiZcuW9O3bl8qVKxMREcHmzZtZtmwZ0dHR8j6KFi1K3759iYyMZNu2bSgpKckz1ZSVlVFVVSUjI4NRo0axcOFCJBIJixYtYsyYMdSsWRNTU1P27NnzPj9GQRA+wJUrV/jll18ICgpi0KBBKCkpsWrVKjp27Ii/vz+GhoZMnjyZ7t27A7Bt2zb09fVxcnLiwYMHVKhQ4StfwVfw6BGUL684qPS26dNh9mywsICEBLh7N++AWLlysja1a8OpU7LAlaKAWGCgLKupYkUOFCnCcV9fxlSpQsWbN1mhrMy+hg3R1NREycsLT4kEL6mU1Lp16di1K3h4gLc3jB0LCxfClStgYyMbu5ERtGkDlpayrKoDB8DfH/r3h9WrZec+dQqaNUMK3LazI8zAAOPwcGpeu0ZgrVqoFCuGeVgYSjY2snOlpMiCbzY2Oa+hRQsoUuT/12NtzdNhw7jarBnKyspUA8rWro1SsWL/PyY1VRZES0yUBQz/JX34kDRTU7ZoaNAhPDxXtrZcRAQYG8sCZPPn/3+7tzc0bizLfBszhoyMDGKNjFDOzGTl0KFk/Vs7rHh0NIPWrOGSrS21fX1RVlZWfB5BEARBEISfkAiI/QAyMzPJysrKcyXG/4qPj2fx4sV57n/9+jVLlixBTU2NIkWKULRoUfnnt78uVKgQT548ITU1NcfxcXFxBAQE8PTpUyIiIuS1vwDU1dXlBZXzyjYrWrToO+uYxcXFERwczJUrV+SFmiMiInK1y15BMzk5WR4AVFdXp0qVKjg5OdGnTx9evXrFsmXL2L17N+np6YAsa8za2ho1NTV5Nll2hlr2Z3t7e/z8/NDV1UVfXx89PT2ioqKIiYn55HXYBOFb8erVKxISEjA0NERbW/urjiUjI4MVK1YwdepUtLW1cXFxYcOGDVSqVInw8HDKlSuHqakpnp6elCpVCj8/P0qXLs3u3bvp1KnTVx37V5GaKssKK1EiZ1DpvwGxZ89ATw80NWH4cFk2Ul7/Kjx+LAuwSSSgowMdOyoOiA0aBJs2QVQUGXp6NGvWjPDwcA7Fx1MuLo65kycDoBEVRcyLFxwOCSEwMFBWU0sqlU059PeHly9lUyJtbGDlSvjzT1mgbe9eUFKC4sWhQQPYvfv/5w4IgNq1SSxalIXDhsk3Nzt+HLsLF8hSUiJo2jSsw8Jg1y5ZQGzPHtm15OFpcDClLS3xq1eP082aybeXKlUKV1fXnM+NceNkgav4eNDVBeBWu3aYHDzIiS1baNOjhyxgpq0texzfFhoKVarIgmFvZ41lb1+1CgYPJiMmBpXixfGvW5dTjo45uhiyYgUaKSlov3olAmKCIAiCIAhvEcsP/QCUlZXf659cXV1ddHV1SUhIULjfxMSEnTt38uLFC54/f87z58958eIFz54948aNG/LvXVxcqF69eq7jCxUqhJGREUWKFKFZs2ZoaWnJa4KlpaWRlJREXFwcDx8+5Pz584SHh8sDUSCb8liqVKkcAbP/Bs+KFy9OgwYNaNCgAePGjQNkq0xev36dgIAAvL29CQwMJCIiIlcmXGZmJsHBwQQHBzN79mw0NTWpUqUK48ePx8jIiDVr1nDjxg0uX74MgJ6eHrq6uvKAW3Z/fn5+FClShBcvXlC4cGFu3bpFVlYWISEh1KhRo8A/D0H4Hrx69YoDBw7Ip1pLJBLMzc1xcnJSmNn5JaioqDBq1Cg6duzIqFGjWL16NbVr1+bx48coKSlx7949lJWV0dXV5fHjx+zbt4/SpUtz5cqVnzMgpq4uC4a9S/HiBe/z3+l67xQfLzv/kiWoXL7MqatXUY6PJ9zIiIy3VkJMKVkSvZIlmVShAprt28P587I6YCYmsoBeWNj/+zxzRha8CwqCQoVkwaSEBNlnqfT/wSVfXwB0nj9n2vTpCod3Sk0NK0AejlqxAoYMkQWqqlSByZNlGVmZmSTdvk3Kv1Pj7f39sff3z9FX5tChkJb2/w3R0aClBTt3wqJFSB8+xCQ1lSQNDdooK8sywCIiZNM8mzSRjfvKFdlxxsayQOOCBVCpkiwTLjJSNjWzfHno2hUAlX9LBmQoeGMsXVWVYs+fw/PnBfv5C4IgCIIg/CREQOwnJJFIqFevHseOHcu1T0lJCWdnZ4yNjfPtIyUlhXnz5uU57bJMmTJcvXqVp0+fygNrb968ydVOW1sbIyMj9PX10dLSQl1dHYlEQmZmJikpKVy/fh0fHx9iYmJyBM1UVFTkmWb/DZjZ2trSqVMnihcvTnp6OiEhIVy8eJFTp04RFBSUK5MsOTlZPiUTZPXSatSoga6uLsHBwcTHxxMfH49EIkFNTS1HRtyLFy9QVlbm6dOnqKmpkZmZyZkzZ0RATPihpKamsmnTJuLi4uTbpFIp169fJzk5mW7dun3F0cmycvbu3cuRI0cYPnw4r169okSJEsTFxXH9+nVMTU159eoVv//+Ow4ODly5cuWrjven1LChLPtq5kykJUsSY2BAyfh4jKKiONG8eY6munFxdF6zhhQ9PbRnz5YFpWbOlO3U05MFdgBu3ZLV3bKzg9OnISZGtn33blmgbt482fdvvWbvb9uWrH/fQKp68yaV794lSVOT1NRUsrKykL+1dPas7LNEIgvCde4MqqqQno42UEpTk3umplS8f5/DrVqR9tbqyVIlJewiIzEyMoL798HTE2rUkGXJdejAWl1degcEYJCSAr16yYJtNWrI2m3fLiv+P3iwLPMuJESWCZeaCm3b/v9BsrKSBQsLF5Z9X7w4adraGP+nNqjmmzcUffFC9k1EhAiICYIgCIIgvEUExH4gmZmZhIWFkZSURPHixSlZsmSebWvXrk16ejq+vr6k/ftOtp6eHk5OTu8MhoEsIJbfbNu0tDTOnDkDyGp46enpUaxYMXnQS01NTZ7VlpWVRUZGBomJiTx//pykpCQSExOJj4/P1a+Kigq6urpoamoCslW1Hj16xJs3b0hISMgxPVNJSQkjIyPKlCmDsbExpqamNGzYkJIlS5KWlsajR4+4fPky169fJyIiQl5zLDk5mWvXrsn7UVNTQ01NjcTExFzTQwF5Mf9ChQrh4ODAixcvWLJkCZUqVcLBweGrTysTfh5paWmEhIQQGRmJhoYG5ubmlPgEN8DXrl3LEQx7271794jMvvn/ylq1akWjRo2YMWMGCxYsQEdHh7i4OEJDQzE0NOTly5e8fv2a69evy6c/C1/IwIGyAv3u7kiiosj+63Td3JxLtrY5mtb39UUtLY1V3bszYMAAtFJSZFMGU1Lg+HFZYXn4f9bZgQOymmHFiskK6YNsVcY5c8iQSskoWxaNf/sOrVKFVE1NlDIyaH34MAAZamooKSlx+9YtqqSloQxca9aMp+XLUyg8HCtvbzSAVImEbW3aYPTmDZY3b6L872v/rapVSf7P63zJR48wKlwYOnWSTT29exdateJQ794MatuW/m9nsY0ZI8sO69BBFkALCYFff5VPr6RUKZg4EVq2hF9+kbWZM0fW98mToKGBj58fl1NSGP/oEe0DAjhboQLqqam08fNDNftvdXKywh9NRkYGN27c4MGDB0gkEkxNTalWrZqYXikIgiAIwg9PBMR+EI8fP8bDwyPHNMjy5cvTsWNHeR2tt0kkEuzt7bGxsSEqKgplZWVKlSpV4NpXurq6aGhokJLHalwVK1Zk//798uyqtz/i4uLy3J729jST/1BWVkZdXR2pVEpqaippaWlIpVIyMzNJS0vLtWplVlYW4eHhREdHc/XqVaRSKenp6TnaSSQSDAwMqFKlCvr6+kilUmJjY3n27BmxsbGALMiQ37gASpYsSZ8+feRTx+Li4rh8+TL37t2T3dAp+BkIwqf08uVLtmzZkiNwdf78eerXr0/jxo0/qu/w8PB89z958uSbCIiBrG7g33//TY8ePRg8eDD+/v5kZWURGxuLpqYmGhoa9O7dm/nz51OhQgXq169P8feZIih8GGVlMDODFi24VqkSz4OCaHrmDBYhIYRWqcKdKlXkTavcvs1dMzNe6eoydcoU+np4UDk+nldaWkT99htrTU1ZDryKjkY/I4Nd+vpMPXqUSgkJHAYiJBJKZWRgoaJCCKAOPAMKASb37xNRujSNT51CNT2d7Ld1st+YkUilJGlp4VaxIjt27CAjI4PKGhr4JyejmZbGheBgtNq14361agz75x9ANs1SLSWFNHX1/0/TzMyUTWe8dUu2SMGUKST17s2QIUNo2bIlEh8fSEqSrYh55Aj06CE7btgw2UqdQUHg4CBb+XLhwn8fmCrQrp3sa2trWdbdhg0E2drSpk0bbOvXZ3S5cphv3ox59t85R0dZP6tXy6Ze/kdycjJbtmzJsVBNSEgIV65coXv37l9tOrQgCIIgCMKXIAJiP4CEhAS2b9+eK2jz8OFDPD096ZH9j7YC6urqlCtX7r3PqaysTJ06dTh37pzC/W3atKFsQWvLvCU1NfW9Amj//T5729uPRUZGRo7MMUCeGZK9MubLly/zHdfb7RVxdHRUeOPw6tUr/P39afZW0WVB+NSkUikeHh4Ks7h8fX0pU6YMpqamH9z/uxbsUHtruti3onr16vj4+LBhwwbGjBlDWloaffv2pdi/KwAmJydz8+ZN7ty5Q69evShduvRXHvEP7u+/YelSuHePx2fOEB0fD2fO8NLAACcvL+6amSFVVkY3Ph6dpCQi/w2w1t66FfOXLxmgpkazjAyavXzJk39/39L/fV2+VbkyjqVLYyKVwp49lPg3+2pJnz48adAANTU1khYtQjcoiE4eHgBIkQWy0lRUyFRWptCrV6j9+3cjU1mZclpaLFmyhF69esnGb20NgYGsGzwYbzs7fHx8SNTRoVB8PCOXLkU9LY00VVVCK1fmRPPm1FyxAg4fhm3b5HXP/jh4kLi4ONasWYOkSRO4d08WQAsK+n9ALHuFylevZJ89PP4/FTR7hUuQLRygp0f80aM0nzaNSpUqsffgQVR0dWWP9d27slpwZmbQrZtswQEFrwGnT59WuGrz06dPOXv2LM3/M51VEARBEAThRyICYj+AoKCgPDOYHjx4wI0bN9DX1ycrKwupVFrgz+9qo6qqirGxcY7sEWVlZSpUqMDdu3e5c+fOe53vXeNQV1fH0NAQAwODdx6TlpZGSkoKycnJJCcny79OSUnJ8ZGcnExiYqK8XWpqKunp6aSnp5OZmSkPgOU3PVRdXZ3y5cvnuf/OnTsiICZ8VtHR0QpvarNdvXr1owJiVatWldfY+y9lZWUqVar0wX1/TkpKSvTv35+2bdsybdo0eTDsbRkZGRw7doyBAwd+hRH+RFaulBWl19FBX1+f6H83R5csifmNGxR+/ZpXhobo/JvlnKirS4OzZ2n48iWnmzVDq3JltE6exCA0lML/Tk98np5OMUD3wgX+vHBBfqrXWVkYAkGnT3MqOpoiRYrQNCyMJ+XKEdCzJ8/PnWPovzXC1DIyKPriBaOXLpUfr5eQQHtPT7bq6PDs2TNZBmFSkmzn48dYjxjBpUuXkEilJGto4OXkRKaKCmUeP8YmIIBKDx6g/uYNLFkCrq4wfDhZSkos2rqVlStXUrp0aTItLVG+d082VTIy8v+PU/bXRYvKPj97JvuspJRz1UuplKyMDM6ePk2RsmXx8vJCN3uKZfHi/18YITNTVg+tTp1cGWKZmZlcv349zx/ZtWvXcHR0FFOLBUEQBEH4YYmA2A/geXaB4Tx07dqVmzdv5rlfQ0MDa2trypYtS2ZmJrdv3yYkJCTXFMS86OnpUbp0aTIyMnj48OE7pxd+LCUlJSQSSb6fgRz/xEskklxBrexg238/srKykEgkKCsr5wiK5eVdNwtPnjzh0KFDtG7dWtxYCJ9FXvW9Crr/XSpUqECNGjVy1NbLJpFIvvk6eUWLFsXS0pLItwMPb4mMjCQhIeH/AQXh03v2TBacASwsLAj997VQ8u/fGaV/P6v+m81b9tEjagUFIR01iiZLltAE4I8/YOZMNq1ZA40bU9XJCby86LVkCXdUVEh5+pTyGzdiEBMDUimFTE3R1NTEMDiYYq9eMT4tjUUzZqApldIZUNXUpFByMgHW1jwwMaHOpUuUfvoUSVYWugkJGD95wnN/f4oHB8syrgAyMtDV1aV3lSroJCURUr06NywsALhdtSpFpFJML1+WTWccNQqA9IQEMqRSHBwcaNOmDZs2bUJdIqErkPbmDWkvXqADkJUFGzaAgYGsaH724wZgawsVK8ofzvitW9F784abhQtz4sQJirydPfa2BQsgKgqWL8+1K/sNoLwkJyfLFhoQtcQEQRAEQfhBiYDYD0BHQV2Qt/3+++8UKlRIYfAoLS2N4ODgHLXAKleuzIABA7C2tkZFRaVAAShFn9+nLUBSUhJv3rwhMTFRXlg/++uEhAQSEhJITEyUf/3fj/j4ePnX+f2TD7I6Qzo6Oqirq6OqqoqSkpK8xtibN29ISkp658IB2TQ1NfO9mb59+zbz589HWVkZGxsbxo0bh7OzMyoq4uknfBqGhob57i9UqNBH9S+RSGjXrh3lypUjKCiIhIQEihYtyu3bt/nzzz+xtLTEycnpo87xuWUvfpGXgr4BIHwgMzNZAfiXLylkaEi1atUAKBURQaqaGq8MDABI//d1sWZQEK9bt6bw4sX/7+P8ednnnTsBkPz7d6vY+vUU8/GBQoVkNbjKlQOplAH16zNAQ0OWIWVuzoKAAOaqqDDAxIQijx+ToKpKgrIyp5o1I01dncqhoZSKiGB/+/a0PnyY/uvXw/r1Oa/j0SMYPpySGzYgLVwYpWnTuL9vH2XLlmVoyZIYXb4sq5cWEwNbtwIQdvYsplIpiyZOZNOmTbIp/JUrE1a+POUfPiQhMJCkadMofuEC+PnBmjWyxQJ8ff9f++vCBejbF2xtSbt1C7Xly4lWUqLT0aP/Xwhn61bZFEsHB9kxp07JVtwcMEBWsP8/NDU1UVdXV7hYDICBgYEIhgmCIAiC8EMTd+Q/gJo1a3Lx4kWF+wwNDencuXOemUm7du1SWBj/5cuXZGVlYW1trfC47ML2eQWn/hugele7pOzpKHnIXl1S0UfJkiVzbVNTUyM1NVUeTHv16hUvX77k2bNnREVF8eTJE2Ky67KAPCPs7Vpj2atjKikpER8fT0pKCioqKlhaWtKgQQPq169PvXr1MDQ0JCwsjK1bt+YKoGlqatK7d28ePnzIzZs3uXjxIp06dZIHx4YNG0aHDh3kq2YKwocoWrQo5cuX5+HDhwr3L1q0iBIlSuDg4JBj+6tXr4iNjaVQoUJ5Z5j8SyKRULNmTWrWrCnflpWVRXBwMK6urly+fPmbnToJsiy3Z9nZNv9hYGCAnp7eFx7RV/LPP/D69f+n5h06BNnT3keMkAWVHj+GLVtk265ckX3+6y/Z57JloWfP//d36BBkZw6mp8tWksxu27Yt/Js9xaRJsjpZderAoEHY/vv6qx8Xx5nGjcn6N/CinZgIgFRTk8Lt28tqcGXz85N9dnOTff53JWOuX4fu3cHJSRb8yg5uzp0rW+GxVSvZipQTJyKtVIkhjx8jBbTevGFbt26yYvj/UsnIoGZwMJfq1EElMxNbQ0NUDx2S9SmVwrlz8OABuLoimToV83LluDRpEkZGRhhlZ2tnZsqK6f/7OGU/KxJPnCBDX1/2jUTC7k6dmDhvHrqJiSjPno20ShUkW7fKruXaNdnjZ24Onp6yVTOPHEG6YwdvsrI4paxMlX37qPb2Cp1mZhAbCzNnylaUrFRJFlAbNCjXr0FycjLTpk3jwoULNG3aNNd+ANv/rP4pCIIgCILwo5FIC5ICI3zzLl26xLFjx3Js09bWpkePHpQoUULhMampqcydOzfPLKjU1FTu3r2bZxDrv4Xq/6tmzZrY29tTpEgRMjMzef78Oc+ePUNDQwNdXV309PTyDHJlf2S3UVdXlwf10tPTiYyM5MmTJzx9+pSnT5/m+jp7hchsOjo6qKqqkpWVRWJiojxbRFlZmXLlylGpUiV0dHRITk4mIiKC27dvk5ycjJaWFnZ2dtSvX5/69etTp06dPKeHPXz4kDNnzhAeHo5EIsHMzIymTZvKAw2xsbGsWrWKJUuW8OLFC/lxSkpK1KpVi0GDBtG5c+ePzuYRfk6JiYls3749Ry0xJSUlrKysmD17Nv7+/owYMYI5c+YglUo5cOAA9+/fl7ctW7Yszs7OFC5c+L3OGx8fj62tLZmZmVy6dOm9j/9SEhIScHNzI/HfgEs2qVRKixYtfp6b/3LlZAEvRR4+lO0/exYaNVLcpkED2f5sffrApk2K227YINuf7fhxmDMHbt6UBeUyMnjTqRNzq1QhOjpatvLxzZtUnjs37/FXqSILNmVLT4fZs2XnioyUBewcHGSZXbt2QefOsnbz5sGGDWSFhUFaGhmFCpF2+DAr/P1zvClU++JFzENCMIiNRSMtDaVixaBJE9DQgLVr4c4dWeDpLRUrVqR9+/bMnTtXFjQrXhwsLUk9eJBatWrRLC2NJffvs717d+69Ne3R+MkT+q9fzz4XF67XqMGoUaNkz58HD8DeHvT0ZEHAf+uJZWRk0KlTJ44dO8aJEyeoX79+3o9TPnx9fenfvz9Pnjzhjz/+oEaNGlzJDnwiC37b2dnRtGlTMc1fEARBEIQfmgiI/UCeP3/O9evXSUxMpESJElhYWOSbeRQfH8/it6ej/EdycjJBQUEKA1TvCmKFhYVx4MCBXH0WKlSIQYMGoaWlpfCcWVlZxMTEyINbioJdUVFROYJ4enp66Ovry681OTmZV69eER8fL29jZGREpUqVMDMzo1KlShgZGfH69Wvu3r2Lv78/gYGBZGRkYGBggL29vTwAVqtWrXeusPdf6enpKCkp5TnVRCqVcuHCBZYuXYqnp2euwKKFhQX9+/enc+fOeQYzBUGRrKwswsLCiIyMRENDgypVqqCrq0tmZib//PMPkydPxsjIiCFDhuQKDIEso3Tw4MHvPZ333r171K5dm7p163Lw4MFvdprVq1evOHnyJHfu3CErK4siRYqwefNm1NXVOX369Hs/14WPcOUK2NjAhg10P3mS8PDw/69aPGSILMh25w5kr/55+jQ0bQqrVsHgwbJtz5//v/j829q2la3wePdurpUVfceNo/6iRSQtW4b2iBFERESwZ8+eXHX2TE1N6dq1q+x3+d49WU2vBg1kGXHZ/j1/xYoV6dChA3///bds8YBhw2DRIv549Yq///6bq/7+VGvRgntFirCjWzf54c6enlS5fZvFY8aQoqXFyJEj0U9NhXr1ICUF/P1lAUpkfzf69+/P5s2b2b9/P61bt37vhzwhIYFJkyaxcuVK6taty7p166hcuTIge26EhYUhkUgwMTERb8wIgiAIgvBTEAGxn5hUKmXx4sUk/Luq139VrVqVTp06vXe/WVlZLFmyJM9+K1eujK6ursIMr/Dw8BxF+TU0NChdujTFixdHW1sbZWVl0tLSiIuL49mzZ4SHh8tr/+jo6FCpUqUcgS8zMzPMzMx49eoVvr6+8o/sRQaMjY1xcHCQB8CqVKkiL8r/Jbx+/Zpt27bxzz//EBoaKi/kn61SpUr06dOHzp07U6FChS82LuHHdP/+fX799dcc0x7/y9nZmRo1arx338ePH8fJyYmJEycye/bsjxjl55eZmUlmZiZqamr4+/vTqFEjBg8ezLJly7720H58b0/ZXLUK2rdnzeXLFC1alPbe3rIpm0+fgqUlFC4sK0yfmAjz54OxMQQEyOprAYweLQsatWgBZcrIpgt6eMjajBgBCn6eNypXxvTOHTRev5adC9nvw927d+VTaks5OvLI2ppm/fvLsuZWrQJdXdm5SpX6f2daWtClC7MPHaJqrVo4Fykiq29WowbXV63Cqn59fvvtN6ZPny4PlN2sWpUHJiaUefKEmteucbpxY/wcHDA0NGTYsGFILC1l0yUnTJBNl0T2t3rHjh1sOHqUPlu30r179/d+2I8fP86gQYN48eIFc+bMYdiwYd9s4FoQBEEQBOFLEQGxn9zly5c5evRoru1KSkr069ePUm//819Az549Y/Xq1Xnuf/r0KevWrUNZWRkjIyPKlCkjD3qpqamRmZlJYmIiz58/59GjR9y7d0+ezaKsrEyFChVyBb0qVapEiRIl5KtJ3rlzRx788vHx4fG/U4QqV64sD37Vr1+fsmXLfhNTQqRSKZcvX8bd3Z3t27eTkpKChoYGycnJ8jblypWjR48edOrUCXNz829i3ML358yZM/j6+ua538bG5oML5C9YsIBff/2VnTt30qVLlw8d4he3evVqhgwZwsaNG+ndu/fXHs6PrSBTNkE2rXLsWNmUQTU1WR2whQtl0xGznTwpC3pdvSrL1tLQkNUsGzAAeveG/75GxseTqq9PQJEi2OdRTw4guEoVity5QylVVSRFisgyzv78E4oVy9lw4EA4f57E27fRkEhQqVABOnQgY+JEbJs1IzU1lcDAQPnftZ1NmuB44wb6r18Tr6fH5dq1uWRrCxIJnTt3pkqVKrnH/JYIU1NK3buX92OrQGxsLGPHjmXTpk00adIEd3d3ypcv/159CIIgCIIg/KhEQOwnJ5VK8ff3x9fXV56ZpaenR6tWrTD7T52Ugnr+/DkrV67Mc7+mpibFihXj+fPn3L9/n7t373Lnzp0ctY+KFy+uMOhVvnx51NTUcvSXkZFBcHCwPADm5+fH8+fP5bW5soNf9vb2FFU0veYbEx8fz/bt23Fzc5NPWZVIJDmmgJYoUQJXV1c6dOiAnZ3dF81qE75v58+f5+TJk3nut7e3p0mTJh/Ut1QqpWfPnnh6euLv74+lpeWHDvOLkkqlDBw4kK1bt+Ln55fnYiLCp5eYmIiuri5btmyhR48en/185cuXp2PHjsyfPz/PNo8fP6ZChQosX76coUOHvrNPU1NTOnXqxJw5cwCYN28ekydP5sKFC9SuXZvU1FR69OjBvn37WLlyJSoqKjx9+hSAYsWK0bhx4zwXpMgO1k6fPp1p06a917V6enoybNgwkpOTWbhwIf369RNvpAiCIAiCILxFBMQEANLS0oiKipJnbX1MgCUrK4ulS5fmCOC87eTJk/j7+6OlpSWf0vh20KtixYr5FuZOTk7m8uXL+Pj44Ovry4ULF0hMTERDQ4M6derIA2B2dnbo6up+8HV8CwIDA3Fzc2P79u0kJSVRoUIF+cqA2dlwBgYGdOzYkfbt29OoUaNcAUPhx/L8+XOuXbtGYmIiRYsWpWbNmnku9KDI69evWbZsWZ6LaRgbG3/UjXNycjL169fn+fPnBAQEUOy/WTXfqJSUFBo0aEBUVBRXrlz5bsb9vbtx4wbm5ub4+/tTt27dz3quhIQE9PT02LRpE7169cq3badOnQgJCeHWrVvv/Hv4dkDs7t271KhRg2HDhrFgwQKSkpJo3749Z8+eZffu3bRr1w6QPU+ysrLyfe7u2rULV1dXRowYwZIlSwr8nHz27BnDhw9n7969tG3bllWrVmFkZFSgYwVBEARBEH4mIiD2k8nKyuLKlSsEBweTmJhI8eLFsbW1xcTE5L37SkpK4t69e/IMr7c/FytWDFdX11w3EsrKytjY2FC1alVKlSpVoMDb69ev5Vlsvr6+BAQEkJ6eTqFChXIUwLeyskI9u7bMDyYxMZGdO3fi5uZGQEAARYoUwdTUlPv37/PixQt57TEdHR3atGlDhw4daNGixXsFSoRvn6LVZDU0NHB1daVMmTIF7ievLDEfHx/OnDlDmzZtWLt27QcHhcLDw7G2tqZSpUqcOnXquylWHxERgZWVFZUrV+bkyZPfzbi/ZwcPHqRdu3ZERUV99kVELl68iJ2dHVevXn1n9qKfnx/169fn6NGjtGjRIt+2JiYmdO7cmVmzZsmDqtevXyctLY1WrVpx7do1Dh48SOPGjQs81mPHjtG2bVu6dOnCpk2bCvS3UiqVsm3bNkaNGoWSkhLLly+nS5cuIitMEARBEAQhDyIg9hORSqV4eHjIC8q/rU2bNtSqVSvX9szMTB4/fiwPdr0d+AoPD5e3K1KkSK5MLwMDA548eUJERATq6upUq1aN+vXr57vyJUBkZGSOAvghISFIpVJKliyZo/5X9erVf8qiwMHBwbi7u7N161bi4+OpU6cORYoU4fLlyzx//hxVVVXS09NRV1enefPmuLi40KZNGwwNDb/20IWPkF9tPh0dHUaNGvVeq0OGhYUREBBAbGwshQoVolatWnh7ezN27FjS0tLQ1dVlw4YNuLi4fNB4s4vVDxw4kBUrVnxQH1+Dr68vjRs3Zvjw4fmuwit8GkuWLGHKlCkkJSV99sCNu7s7gwcPJjEx8Z1/h6RSKdbW1hQtWjRXEPq/TExM6NKlC8bGxgwbNgxvb2+qVKlC8+bNefLkCUePHqVOnToFHuf58+dp2rQpTZs2xcPDo0CB2adPnzJ48GC8vLxwdXVl6dKl30WJAEEQBEEQhK9JBMR+Ivfv32fbtm0K96mqqmJvb8+DBw9yBL3u378vry2mrq5OxYoVFdb2MjAw+KAxSaVS7t+/nyMA9uDBAwAqVqyYIwBWoUIF8U73W5KSktizZw9ubm5cuHCBYsWK0bRpUyQSCUePHiU2NhZ1dXVSU1NRVlamQYMGuLi44OzsjLGx8dcevvCejh07xqVLl/LcLy/K/ZEePXpEr1695IX3u3TpwurVq/OdxpwXd3d3Bg0axJo1axg0aNBHj+1LWbFiBcOHD2fz5s307Nnzaw/nhzZy5EhOnz6t8I2az3GuEydOEBoaWqD2W7ZsoVevXty6dSvf55aJiQktWrRg8+bNdO/enSlTptC0aVMSExM5ceIE1atXL/AYr1+/ToMGDbCwsODYsWPvDNxlZWXh7u7Or7/+iq6uLqtXr6ZNmzYFPp8gCIIgCMLPTATEfiKHDx8mMDAwz/07d+4kNDSUMmXKKAx6lS5dWp6RJZVKefr0Ka9fv6ZIkSKULFmyQMGqzMxMrl+/nqMAfnR0NBKJhBo1auQIgH3u6TM/khs3buDu7s7mzZt5/fo1jRo1om7dukRFReHp6cnr16/R0tIiJSWFrKwsateujYuLCy4uLnkWcxa+Lbt37+b27dt57m/ZsiW1a9f+JOeSSqW4ubkxevRoUlNTMTAwYPv27Tg6Or53X8OGDcPd3Z0zZ85gb2//Scb3uUmlUvr168fOnTvx9/dXmD0rfBqtW7dGIpFw6NChz36uRo0aUaRIEfbs2VOg9qmpqZQtWxYXFxdWrVqVZ7sKFSoAkJ6ezr59+3BxcUFVVZWTJ0++VzmCBw8eYG9vT8mSJfH29qZQoULvbD9gwADOnj3LgAEDmD9//gcFrgVBEARBEH5WIiD2Ezlw4ADBwcF57rexsaFhw4ZoaWnl28/z58/Zs2cPz58/l28rXbo0HTt2RE9PL0fb1NRUAgIC5AEwf39/4uPjUVNTw8bGBgcHB+rXr0/dunXf+c+/8G7Jycns3bsXd3d3fH19KVKkCD179qRKlSr4+/vj6ekpLyydnJxMeno6VapUwcXFhfbt21OrVi2RhfeNOn36NH5+fnnuv3fvHq6urrRo0eKTTSV+8uQJPXv2xMfHB4B+/fqxdOlSdHR0CtxHeno6TZs2JTQ0lCtXrlC6dOlPMrbPLSUlhfr16xMTE0NgYCBFihT52kP6IVWrVo2mTZuydOnSz3oeqVRK0aJFGTlyJH/88UeBj5sxYwZz587l6dOneWZCZ6+avGTJEv766y+KFSvGyZMn36uQfWRkJPb29qiqquLr65tv/b7MzEyWLl3K1KlTKVGiBO7u7h+8MqwgCIIgCMLPTATEfiLXrl1j//79CvdJJBLGjBnzzlUZ09PT+eeffxSuIFmiRAm6du3KhQsX5AGwy5cvk5qaiq6uLnXr1pVnf9WuXRsNDY1PcVlCHm7fvs3atWvZtGkTL1++xMHBgT59+qCjo8O+ffs4cOAAb968wcDAgJSUFN68eUOZMmVwdnbGxcUFe3v796pJJXxer1+/ZsWKFWRkZOTap6ysjIeHB8HBwZQuXZr+/fvTr1+/TxJ8kkqlrFu3jhEjRpCamkqJEiXYvXv3e2V7PX/+HGtra4oUKYKvr+87g+7fiqdPn2JlZYW5uTnHjx//7p4PWVlZxMfHo66u/s6pd1+DVCpFS0uLv//+m1GjRn3Wc0VHR1OyZEk8PDxo3759gY979uwZZcqUYebMmUyYMEFhv6VKlaJMmTLExsZSqVIljh49+l41G2NjY2nQoAGvX7/Gz8+PsmXL5tn25s2b9O/fn8uXLzNy5EhmzZolFk8RBEEQBEH4QCIg9hPJyMhg3bp1REdH59pna2tL8+bN39lHcHAwBw4cyHP/li1bePDgAcWKFcsx/dHCwuK7u5n8UaSkpLBv3z7c3d3x9vZGX1+fXr160aNHDx49esTOnTs5fPgwqampFC9enJSUFOLi4ihSpAht27bFxcWFpk2bigDmN+Du3bt4enqSmpoq32ZoaEi3bt3Q19cnMDAQd3d3tm/fzps3b2jZsiWDBg3Cycnpo59/T58+pXv37vLaYiNGjGDevHkF/r0IDg6mbt26uLi4sHXr1u8mE/HcuXM0adKEUaNGsXDhwo/qKyMjg5SUFLS0tAq0auCHkkqlBAQE4OfnR0JCAiCb1te8efMPXjn0c4iKisLIyIgDBw7Qtm3bz3qukydP4ujoyN27d6lYseJ7Hdu3b19Onz5NWFhYrudRx44d8fT0RElJCQcHBw4cOPDON5belpSURNOmTbl37x6+vr551ipLT0/n77//ZubMmVSoUIH169dTt27d97oOQRAEQRAEIScREPvJvHnzhpMnTxISEkJmZiba2trY2tpSr169At2gHj9+nIsXL+a5v1ChQrRs2RIzM7Pv5ob3Z3L37l3Wrl3Lxo0bef78OfXq1WPgwIG0aNGCU6dOsWvXLo4ePUpGRgalS5cmNTWVmJgYdHR0aNmyJe3bt8fJySnX1Fjhy0lNTeX27dskJiZStGhRKlasmCu4kpCQwM6dO3F3dycgIAAjIyP69etH//79KVeu3AefWyqVsn79eoYPH05KSgplypRh3759Ba6xtWvXLrp27cr8+fMZP378B4/jS1u2bBmjRo1i27ZtdOvW7b2PT0hI4PTp09y4cYPMzEw0NTWxsrKiQYMGn+WNAl9fX86cOZNru6amJoMGDfpm6kz5+/tjb29PSEjIexWe/xCLFi1i6tSpJCQkvPeU4uDgYCwtLdmzZw8dO3aUb/fw8JB/b2ZmxrVr197rjYPU1FTatm3L+fPn8fb2xtraWmG7wMBA+vXrx82bN5k4cSK///67eINCEARBEAThExABsZ9Ueno6KSkpaGtrv1emgr+/P6dOncpzf/v27TE3N/8UQxQ+o7S0NA4cOICbmxunTp2iUKFC9OzZk4EDB1KmTBn279/Pzp07OXnyJFKplAoVKpCRkcHjx49RU1OjSZMmuLi40K5du28q40TILTg4GHd3d7Zu3UpCQgKOjo4MGjSINm3aoKqq+kF9hoeH061bN3x9fZFIJEyYMIGZM2cWqL8pU6Ywd+5cjhw5QosWLT7o/F+aVCqlT58+7Nmzh/Pnz1OzZs0CH5ucnIy7uzuvXr3Ktc/U1JRu3bp90jcPUlJSWLRoEenp6Qr3W1tb06pVq092vo+xdetWevbsSWJi4mef9te3b19u3LhBQEDABx3foEEDMjMz5XX8YmNjKVeuHAkJCWhrazNy5Ehmz55d4P4yMzNxdXXlwIEDHD16lMaNG+dqk5yczJ9//smCBQswNzdn/fr1WFpaftD4BUEQBEEQhNw+35wN4ZumqqqKrq7ue0/bMTc3z/PddU1NTbFi4XdCTU2NTp06cfLkSe7fv8/QoUPZu3cvNWrUoEWLFmRlZeHh4cGzZ89YvXo1ZcqU4cmTJygrK2NiYsLDhw8ZPHgwJUqUwMHBgcWLF/Po0aOvfVmCAjVr1mTFihVERkaybt064uLi6NChA6VLl2by5Mk8ePDgvfs0Njbm3LlzrFu3DjU1NebOnUuVKlW4devWO4+dOXMmLVu2pGvXrty9e/dDLumLk0gkrF69Wr4AxcuXLwt87MWLFxUGwwDu37/PvXv3PtUwAVmwMq9gGEBYWNgnPd/HCAsLo3jx4l+kBlZISMhHvVkzevRo/P39uXLlCgCOjo4kJCTQt29fDA0N3+tvqVQqZciQIXh4eLBr1y6FwTA/Pz9q1qzJ4sWL+fPPP7l8+bIIhgmCIAiCIHxiIiAmvBc9PT3atm2b659/VVVVOnTogJqa2lcamfChTExMmD17Nk+ePMHT0xN9fX0GDBhAyZIl+f3337G2tubMmTNERkayZMkSDAwMCA0NRUVFhZo1a5KUlMSkSZMoX748lpaWzJgxgxs3biCST78t2tra9O3blwsXLnD9+nU6d+7M6tWrMTU1pWnTpuzevZu0tLQC9yeRSOjXrx9hYWHY29vz4MEDzM3NmTFjBpmZmXkep6yszLZt2yhRogTt2rVTuEDHt0hTUxNPT08SExPp2rWrwsUNFAkNDc13/+3btz/F8OTelW32PtloUqmUpKQkkpKSPnZYCoWFhVG+fPnP0vfbMjMzuXXr1kdNy2zbti3lypVjyZIldOnShcDAQNq0acO6deuQSCTv9bhOmTIFd3d31q1bh7Ozc459iYmJjBgxAgcHBwwMDAgKCuK333774GxOQRAEQRAEIW8iICa8NwsLC4YNG4a9vT0WFhY0bNiQ4cOHY2Ji8rWHJnwEVVVVXFxcOHr0KA8fPmT06NEcPHgQKysrrK2tOXjwIL1798bPz48nT54wZ84cVFRUuHr1KsrKytjZ2aGjoyOf3mNmZsaECRO4cOECWVlZX/vyhLeYm5uzbNkyIiMj2bx5M6mpqXTp0gVjY2N+/fXX98rcMjIywsfHh3Xr1qGqqsq0adOwsLDINxOpUKFCHDhwgKioKHr06PHd/H6ULVuW3bt34+3tzZQpUwp0zLuCjAUNrBVU6dKlUVdXz3N/QQvK37t3jzVr1rBgwQIWLFiAm5sbDx8+/FTDBGQBsQoVKnzSPvM6T3Jy8kdliCkrKzNs2DB27NjB7t27qVixIgcOHHjv6a7z58/n77//ZuHChfTp0yfHvpMnT1K9enXWr1/PokWL8PPzo2rVqh885k9BKpXy4MEDfH19CQgIIDEx8auORxAEQRAE4VMSATHhgxgYGMjrSDVo0EAUWf/BlC1blhkzZvD48WMOHjxIyZIlGTJkCCVLlmTgwIFER0czZswYLl++zIMHD/jjjz948+YNfn5+SKVSGjVqRIUKFdi4cSN169bF2NiYoUOHcvLkyXyncwlflqamJj179sTX15ebN2/So0cP1q9fT6VKlWjYsCHbt28nJSXlnf1kZ4s9fPiQunXrcuvWLSpVqsTChQvzzBSsVKkS27dv5/Dhw0ybNu1TX9pn06hRI+bPn8/8+fPZtWvXO9sbGxvnu79UqVKfamiAbDq0oil4ALq6ugVamfDevXvs2LGDZ8+eybdFRUWxdevWTzo1+ksFxEJCQgA+KiCWnp7OxYsXycrKQklJiePHj8uDYQXNhl23bh0TJkzgt99+Y+zYsfLtr169ol+/fjg6OmJiYkJISAijR49+7+L/n1piYqK8/uCZM2fw8vJiyZIlBAYGftVxCYIgCIIgfCoiICYIQp5UVFRo06YNhw4d4vHjx0yYMIHjx49Tu3ZtatWqxcqVKzE0NGTSpEkEBwcTGhrKr7/+SnR0NCdOnCA9PR0nJyfs7Ozw8vLC0dGRYsWK0bNnT/bt28ebN2++9iUK/6patSqLFi0iIiKC7du3I5FI6N69O6VKlWLMmDEFqg9WsmRJ/Pz8WLt2LcrKyowfP55atWoRERGhsL2TkxNz5szhr7/+Ys+ePZ/6kj6bESNG0LNnT/r168f169fzbWtnZ5dnfSltbe33KtBfULVr16Z9+/YULVpUvu327dt06tQJXV3dfI+VSqWcOnVKYZAnKytL4eqVHyIlJYXIyMgvFhArUqQIxYsX/6Djk5OTcXFx4cCBAwBoaWlRsmTJHG3elSnm6enJoEGDGDx4MDNnzpRv379/P1WrVsXDw0O+yMmXeEwKwtPTk6ioqBzbMjMzOXz4ME+ePPlKoxIEQRAEQfh0xCqTgiC8l8zMTI4fP467uzuHDh1CXV2dLl26MHDgQGxtbZFIJEilUm7cuMGuXbvYuXMnDx48oEiRIjRs2BBNTU2CgoK4ceMGmpqaNG/eHBcXF1q3bo2BgcHXvjzhLXfv3mXt2rVs2LCBFy9eYG9vz8CBA+nUqROampr5HhsdHU379u25cOECqqqqLF26lMGDB+cKHEilUrp168bBgwc5f/48NWrU+JyX9FGSkpI4efIkN27cIDMzk8TERK5du8bOnTvz/d29desWhw4dypFtl5qaysiRIylRosRnHXNaWhoxMTGUL1+eRYsWMWLEiHzbJyYmsnDhwnzbTJ48+aPrRd65c4fKlSvj7e1Nw4YNP6qvd+nYsSMvX77E29v7vY+Nj4+nTZs2XLlyhcKFC1O8eHGCgoLYsGGDfMpj2bJl6d27NzNmzFDYx6lTp2jVqhUuLi5s27YNZWVlYmJiGDFiBLt376Z169asWrXqndmEX1JMTAyrVq3Kc3/16tXp0KHDFxyRIAiCIAjCpycyxARBeC/Kyso4OTmxb98+njx5wpQpU/D29qZu3bpYWFiwfPlyXr9+jbm5OX/99Rf37t3jypUr9O3bl1u3bhESEoKBgQF9+/alb9++REdH07t3b4oVK0azZs1YuXIlkZGRBRpLZmYmycnJooD/Z2JmZsa8efOIiIhg9+7daGho0Lt3b4yMjBg5cqR8KpoiJUqUwN/fH3d3dyQSCUOHDsXW1pbnz5/naCeRSFi3bh2VKlWiXbt2vHjx4nNf1gdJS0tj48aNXLt2Tb5ogI6ODvXq1WPs2LH5LiRQtWpVxowZQ/v27XF0dKRo0aLMnTs3z9UnPyU1NTWMjY1xcXFh5cqV73yuFKQm1vvWzVIku8bcl8iGunHjxgdNl3zx4gWNGzfm2rVruLi48OLFC7Zt20arVq1YsmSJ/LHM7zG9dOkSzs7ONG7cmM2bN6OkpMS2bduoWrUqp0+fZtu2bRw8ePCbCoYB73wefqvPU0EQBEEQhPchAmKCIHwwIyMjfvvtNx48eMDx48epVKkSY8eOxcjIiF69euHn5weApaUlDRs2pHPnzvKbQ2NjY27evElkZCSDBw9m/PjxSCQSRo0aRalSpbC1tWXu3LkKC7y/efOGQ4cOMXfuXObNm8eiRYvw9fX9boqzf2/U1NTo1KkTJ0+e5P79+wwePJjdu3djYWGBnZ0d69evV7gSoUQiYcCAATx58oQ6depw+fJlSpcuzaZNm3K009LSYv/+/bx584ZOnTp9k3XmgoKC8gwClCxZkqlTp+Z7vJqaGubm5tjZ2dG/f39KlCjBggULPsdQFRo6dCihoaHvzJLS1tbONR3wbWXLlv0kKx6GhYWhqqr6yWuo/VdycjL37t1774BYeHg4Dg4OPH36lJUrV7Jjxw6mTZtGlSpVGDVqFNeuXcPHx0feXlGQ8ObNmzg5OVGjRg08PDyIiYmhTZs29OjRg6ZNm3Lr1i26dev2SQKMn9q76oKKuqGCIAiCIPwIREBMEISPpqSkhKOjI3v37iU8PJzp06dz/vx56tevT7Vq1Zg3bx4BAQE5jlFWVqZRo0Z06NABDw8P5s6dS1hYGCNGjOCvv/7CyMiIP//8k0qVKlG9enV+//13rl69SmpqKps2beLq1avywEliYiJnzpzh0KFDX+PyfyomJibMmTOHp0+f4uHhQeHChRkwYABGRkYMHTqU4ODgXMcUL16cixcv4ubmhlQqpU+fPjRo0CBHhlSZMmXw8PDAz88vR8Hxb0V+KyxqaGiwY8eOAtdBU1dXZ/To0WzZsqXA2ZAfq0GDBlStWpUVK1a8s23Tpk0V1j1TVlamSZMmn2Q8Dx8+pFy5cp+9cPzt27fJysp6r4DY/fv3sbe3JykpiTNnzjB37lwsLCz49ddfAdnjU7VqVZYsWZJnH48ePcLR0ZFSpUpx6NAhtm3bRrVq1bh69Sr79u1j586dFCtW7GMv77PR1NTMt8ajpaXlFxyNIAiCIAjC5yECYoIgfFLFixdn4sSJ3L17l9OnT2NhYZHv1DATExMiIyM5ceIEDRs2ZMOGDUydOpXQ0FDGjBnD8uXLqVWrFv/88w9WVla4uLgQExOjsK/g4OBcRaCFz0NVVZX27dtz9OhRwsLCGDlyJPv378fS0hIbGxvc3d1JSEjIcczAgQN5+vQptWvX5ubNm/Tr1485c+bg7u6Or68vderUYfny5fzzzz+sW7fuK12ZYu8K3DRq1Ii+ffty48aNAvU3aNAgNDU1Wbp06acY3jtlT1s9cOAA4eHh+batUKECPXv2zDGNL7tOVunSpT/JeL70CpPVqlUrUPvr169jb2+PpqYm/v7+eHp6cvPmTdavXy/PjMvOZD1w4ABhYWG5pkw+e/aMZs2aoampyZo1a+jYsSODBg2iY8eO3Lx5E2dn5096jZ/atWvXsLGxwdPTU2E2YJ06dahUqZLCYxMTEzl79ixbt25lz5493Lx5U0xpFwRBEAThmyWK6guC8Fm9evWKZcuW5dtm6tSp8oBDWloaJ0+eZNeuXezfv5+EhARq1KhBx44dKVu2LKGhofkW9G7YsCENGjT4pNcgFExGRgZeXl64ublx9OhRtLS0cHV1ZdCgQVhZWcmnhj158oSNGzfmulEuUaIEffr0YdSoUaxfv56zZ89St27dr3EpuVy/fp19+/Yp3KelpcUvv/xC/fr1SUpKIiAgAH19/Xf2OXHiRFavXs2TJ08oVKjQpx5yLvHx8fJVQ/MqAP9fFStWpHPnzsyaNeuTjqVmzZrY2dnlW7j9U/j111/Zu3dvvhl+2S5cuICTkxMVKlTg2LFjPH/+nJo1azJ+/Hhmz56do+2bN28oXbo0vXv3Zs+ePfTv35/p06fz+vVrGjZsyLNnz+jfvz+LFi2iWLFiuLm54ejo+Lku85PZsWMH/fv3p3Llynh6elKiRAmCg4OJjIxEQ0MDc3PzPIOi0dHRbN68meTk5BzbK1euTKdOnfJcbVUQBEEQBOFrEf+dCILwWWlqauZbIyctLY2ZM2dy+/ZtQFZrqVWrVmzevJmYmBg8PT2pXLkyc+bMoVevXrmyjv7ryZMnIiPhK1FRUaFt27YcPnyYR48eMX78eI4ePYqNjQ21atVi1apVvH79mkOHDin8GUVHR+Pn58eyZcuoU6cO7du3f2c205dSvXp1ypcvn2t7VlYWJ06cIDExkX379hEbG0v37t3zLbKfbdSoUaSkpODm5vY5hpyLnp4ePXv2xN3dnbS0tAIdk5CQ8M4VRd+XVCr9ohliBZkuefLkSZo2bYq5uTlnzpzBwMCAfv36YWJiwh9//JGrfXYQNCIigs6dOwNw5swZ2rVrx8OHDylWrBizZ89mwIAB3Lhx45sPhmVkZDBu3Di6detGx44d8ff3p1y5cmhoaGBra0v79u1xcnLKMxgmlUo5cOBArmAYQGhoKFevXv3clyAIgiAIgvDeREBMEITPSkNDI8/pNSArer148WKqVq1KjRo1mD17Ng8ePJAf6+Liws6dO4mJiWHnzp3vLLg+bdo0KlSowOTJk7l27ZoIjn0lpUuXZtq0aTx69IgjR45QtmxZRowYQc2aNfNdoe7UqVNkZmayd+9e1NTUcHFxUXiT/aUpKSnRrVs3mjRpQtGiRdHR0cHMzAxHR0dCQ0Np0KABqqqq7Nq1i+PHjysMovyXkZERPXv2ZMmSJaSmpn6Bq5AV14+Ojs4z2+2/UlJS0NDQ+KRjePnyJQkJCd9MQMzDw4NWrVrRsGFDjh07RqFChVi2bBmXL19m3bp1Cq//8ePHaGlpUbVqVfT09JBIJPj6+mJmZkZaWhopKSn4+PiwbNkydHR0PtflfRLPnz+nefPmLF26lGXLlrFp06b3DoLGxMQQHR2d5/5r16597DAFQRAEQRA+OREQEwThs2vRogWFCxfOtb1EiRLMmjWLZ8+esX//fqpWrcqsWbMwNTWldu3aLFq0SJ4hpK2tTZcuXViwYAFaWloKz6OqqsqiRYto3rw57u7u1KxZk2rVqjFz5kzu37//OS9RyIOysjJOTk7s37+fJ0+eMGDAgHzbZ2VlUbx4ca5du8b+/fu5ceMGv/zyyzcR2FRRUcHe3p6hQ4cybtw4XF1dqVevHj4+PmRkZODg4ICpqSlz5sxh9uzZeHh4vLPP8ePHExkZyfbt27/AFcgy3RwcHFi5cmWB2qekpHzyDLHs6YufOyAWGxtLZGRkvgGxDRs20LlzZzp06MC+ffvQ0tLiwYMH/Pbbb4wYMULhlF2pVIqXl5fCLEBjY2PGjh1LcHAw9vb2n/R6PofAwECsra25ceMGp0+fZsSIER+06qWiVWbfZ78gCIIgCMLXIAJigiB8doUKFeKXX36hadOmVKhQAVNTU1q2bEm/fv3Q1NREQ0ODdu3asWPHDmJiYti1axfGxsZMmTKF0qVLU79+fVasWMGzZ8/Q1tamT58+uabuxMTEMGvWLDp27MijR4+YM2cOu3btwsrKinnz5lGxYkVsbGxYuHDhNzMN72djZGTEuHHj8i1Qr6enR3JyMs2bN2fWrFmsWbOGLVu25Lui39dWvnx5fHx8UFFRwcHBAWdnZzp16kTv3r25efNmvsdWrlyZdu3aMW/ePLKysr7IeIcNG4aPj4+84HxesrKySE1N/eQZYmFhYQAKp6B+StnXV716dYX7lyxZQr9+/Rg4cCBbt25FTU0NqVTKwIEDKV68eJ510169epXnwh4AZmZmnzyI+Dls3LiRevXqUaJECQIDAz+49qJUKiU4ODjfoHXx4sU/dJiCIAiCIAifjQiICYLwRWhoaFCvXj169uxJ9+7dqV27tsIVzLS1tencuTOenp7ExMSwefNm9PT0GD16NEZGRjRt2pT9+/fTrl07hg8fTt++fRk3bhwrVqzg4cOHLF++nJSUFH755Rd69OhBXFwcy5YtY/PmzZQpU4bffvuNMmXK0KBBA1avXp3v9D3h09PU1MTKykrhPqlUyvbt2/n111+pUqUKnp6ejBw5kq5duzJ+/HhOnDhBWFgYFy5cICQkpMB1sL6E0qVL4+Pjg66uLg0aNGDChAlUqFABFxcXXr9+ne+xEyZMIDQ0lMOHD3+RsTo7O1OiRIl3FrTPnsb5OQJi+vr6CrNGP6UbN26gqqqaa8q2VCpl2rRpjBkzhkmTJrFq1Sp5kHbt2rV4e3vj5uaW51THd9WHK0j9uK8pLS1N/trZo0cPzp07l2NF0YKSSqUcOnQIGxsbOnTokO+USVtb248ZsiAIgiAIwmchVpkUBOG78PLlS/bt28fOnTvx9vZGSUkJR0dHunTpQrt27XKt0hcZGcmePXvYtWsXFy5cQENDg1atWtGmTRtSU1PZt28fJ0+eBKBZs2a4urri7OyMnp7e17i8n0pmZiZeXl4EBQXJs0o0NDRITU1l5syZZGRkoKqqSrVq1QgJCSEzMxMTExMcHR1zZJpoamrSoUMHTExMvtal5BITE0PTpk2Jjo5mw4YN9OjRg3r16nHw4MF8V9nLnl7n5+f3RcY5bdo0Fi1aRERERJ6/87GxsRgaGuLh4UH79u0/2bkHDhxIUFAQV65c+WR9KjJ48GDOnz/P9evX5duysrIYPXo0y5cv5++//2bixInyfeHh4VSrVo2OHTuybt26PPvNyspi8eLFJCYmKtxfv359Gjdu/Oku5BOKjo6mU6dOXLp0ieXLlzNo0KD3niKZHQj7888/uXr1Kg4ODkyfPp26deuyb98+7ty5I2+rqqqKk5MTNWvW/MRXIgiCIAiC8PFEQEwQhO/Os2fP2Lt3L7t27cLX1xd1dXVatmxJ165dad26Ndra2jnaP378mN27d7Nr1y4CAwPR1tamTZs2tGzZkri4OPbs2YOvr688aObq6oqTk9N3Me3pexYfH094eDiqqqqUK1cOVVVVbt++TatWrYiKisLQ0JCIiAjU1NTo0aMHZcqUydWHqqoqI0aMQFdX9ytcgWIvX77E0dGRR48eMX36dEaNGsXUqVOZMWNGnsccPHiQdu3a4efnR7169T77GCMiIihbtixLly5l2LBhCttERkZSqlQpjhw5gpOT0yc7d5MmTTA0NGT37t2frE9F6tWrR7ly5di2bRsgW0mxf//+bNmyhVWrVvHLL7/I20qlUtq2bcuVK1e4desW+vr6+fZ96tQp/P39c23X0dHhl19++SYL6V+8eJEOHToglUrx8PDAzs7uvY6XSqUcPHiQP//8k6CgIBo0aMD06dNp2LBhjnbPnj0jPDwcdXV1TE1NP3mGoSAIgiAIwqciAmKCIHzXnj59Ks8Eu3z5MlpaWrRp04auXbvSokWLXDdj9+/fZ9euXezatYuQkBD09PRo3749tWrVIi4ujlu3bnH8+HHS09NxdnbG1dWVpk2bKpzeKfxfeno6SUlJ6OjooKKi8lF9vX79mu7du3P06FEGDRpEWloaZcuWzbN9o0aNcHBw+KhzfmqvX7+mRYsWhIaG4urqyurVq9m3bx/Ozs4K22dlZVG9enXMzMzYv3//Fxljx44duXXrFjdv3lSYJfTgwQNMTU05c+YMjRo1+mTnrVChAp07d+bvv//+ZH3+l1QqpXDhwkyaNInJkyeTkpKCq6srhw8fZsuWLXTt2jVH+x07dtCtW7d8f0bZ3rx5g729PYULF6Zt27bExcUBstphzZs3x8DA4HNd1gdzc3Nj+PDh2NjYsHfvXkqWLFngY6VSKQcOHGDGjBkEBQXRsGFDpk2blisQJgiCIAiC8L0RATFBEH4YYWFh8mDXtWvX0NPTw9nZma5duyoMat2+fZtdu3aRnp6Omppajn1SqZRdu3YRGhqKoaEhnTp1omvXrtSvXz/fqW8/m5SUFE6ePMn169flUx0tLS1p0qRJrsf0fWRmZvL7778zZ84cBg0ahJGRUZ5ta9So8c4gxteQkJBAq1atCAoKwsrKiqtXr3Lp0iWqVKmisP2GDRvo168ft2/fpnLlyp99fGfOnKFJkyZ4e3srDG7cvHmT6tWrc/78+ffOJspLeno6mpqarFixIkeG1qf08uVLrl69yvjx4/nzzz9p0qQJzs7OnD9/Hg8Pj1zZbs+fP6dq1ao0btyYXbt25du3VCqle/fuHDhwgPPnz2NhYcGbN29QVVX9qN/3zyU1NZURI0bg7u7O0KFDWbx4cYHHmR0I+/PPPwkODqZRo0ZMmzbtg4vvC4IgCIIgfGtEQEwQhB9SaGgou3btYseOHdy5cwcDAwM6dOhA165dadCgAcrKyqSnp7N06VKSkpJyHZ+VlUVsbCy1atWS9/X48WNKlSpFly5dcHV1xcrK6r3r7/xIsrKy2LBhg8JVO01MTOjevftHPz67d+/mt99+o0ePHnm2cXBw+KQZTJ9SUlISbdu25eLFixQtWhR1dXUuX76cq+YdyIIXFSpUoGXLlqxdu/azj00qlVK1alWqV6/Onj17cu0PDAzE2tqaoKCgT1YDKiwsDBMTE06cOEGzZs0+SZ/ZMjMzOXz4MMHBwfJtSkpKhISEcOzYMQ4fPqwwk9DV1ZUTJ05w+/ZtihUrlu855s2bx8SJE9m9ezedOnX6pOP/1CIiIujQoQPBwcGsWrWKvn37Fui4rKwseSDs2rVrNG7cmGnTpn1zWZiCIAiCIAgfS6Q5CILwQ6pcuTLTpk3j9u3bBAcHM2jQIE6dOkWTJk0oVaoUI0aM4MCBAwqDYSC7kY6MjKRfv35s27YNZ2dn3NzccHZ2ZuvW7jGLbAABAABJREFUrdjY2GBmZsbvv//OrVu3vvDVfRtCQ0MVBsNANt3u4cOHH32Ozp07s2fPHl6+fKlwf2ZmJjt27CA+Pv6jz/U5aGtrywMxUVFRhIeH07NnT7KysnK1VVdXZ/To0WzZsoXIyMjPPjaJRMLQoUPZt2+fwvMlJyf/j737Doviets4/l16UbGgoNgQUUEUe0c0dpFiRQR7i11jSexRE429x95RwQoaOxZssYtdQCyINOm97c77x4b9SQArlviez3XtJc6ZnTmzYBLvPOc5QMHuMvn06VNAuWyyoPn6+uYIw0AZ7lSvXp09e/bkGegcOnQIDw8Pli9f/s4w7Pjx4/zyyy9MmTLlmw/DLly4QN26dXn16hUXLlx4rzBMoVBw4MABateuTZcuXShRogS+vr6cPn1ahGGCIAiCIHyXRCAmCMJ3TSaTYW1tzbx58wgKCuLq1au4urpy8OBBli1b9tb3Ojs7c/HiRbp06YKHhwdDhgzhyJEj9OnThzVr1mBjY8PKlSupXr061tbW/PHHHzx//vyLPNe3ICgo6JPG31etWrUYPXq0KqDJpqGhQalSpdi9ezfW1tZcuHChQO5X0HR1dfHy8qJ9+/ZkZGRw+PBh5syZk+e5Q4YMQUdHh+XLl3+RufXp0wcdHR3Wr1+faywtLQ2gQDeXePbsGWpqanlukPAp5HI5169fz3c8JiYm17G4uDiGDRtGx44dcXV1fev1AwIC6NmzJ3Z2dvl+774FkiSxatUqfvjhB6pVq8bNmzepX7/+W9+jUCjYv38/tWrVomvXrpQsWZLz58+LIEwQBEEQhO+eCMQEQfh/QyaT0aBBAxYvXkxwcDA//fTTW8/fs2cPp0+fZuzYsbx69YozZ87Qrl07tmzZwrBhw7h48SLDhg1jxYoVWFpaMmfOHExNTWncuDErVqwgPDz8Cz3Z1/Gu5ZApKSkFdq/KlSszc+ZMkpKSuHDhArGxsQwbNoyRI0dy9+5dTExMsLW1ZfLkyWRkZBTYfQuKtrY2+/bto3PnzqipqfHrr79y6NChXOcZGBjw448/snbtWlWz9s/JwMAANzc31q9fT2ZmZo6xz1UhVq5cuQLfpCI5OVkV4OUlKioq17GJEyeSmJjI2rVr3/qznJCQgKOjI8bGxri7u3+zPQRTU1Pp168fo0aNYuTIkZw6deqtVW8KhYJ9+/ZRq1YtunXrhpGRERcuXMDHxwcbG5svOHNBEARBEISv49v8rzpBEITPTE1NDUdHxzx7OYGy4iQsLIz58+djZmZGixYtCAwM5I8//iAsLIzjx4/TrFkz1qxZw+jRo7l37x7jxo1j8eLFlCxZkgkTJmBiYkLr1q3ZtGkTsbGxX/gJPz9zc/O3jo8fP56+ffty8+bNArmfrq4uCxcuxM3NjbVr19KpUyfCwsIwNTXF19eXuXPnsnjxYho0aMCDBw8K5J4FSVNTk127dql2OHR2dsbf3z/XeWPGjCEtLS3Pqq3PYcSIEYSFheXa3TI7YCroQOxzLJfU09N76+6m//5zfvr0aTZu3MiCBQsoV65cvu9TKBS4uroSFhaGt7d3vv+8+NpevHhBs2bN2Lt3Lzt37mTp0qX5ho4KhYK9e/dibW1N9+7dMTY25uLFi5w6dYpmzZp94ZkLgiAIgiB8PSIQEwTh/y11dXV69uxJ4cKFcxxXU1Pj9evX3Lt3j5SUFCpUqEBkZCTDhg3D2NgYV1dXMjIyWLduHRERERw6dIhatWqxfPlyxo8fT0hICJMnT2bu3LmAchmckZERDg4O7N69O9++Zf81VapUyTfcMDMzY+jQofj6+lKvXj2aNm2q2tHzU/Xr14/z58/z4sUL6taty9WrV1FXV+eXX37h6tWrZGZmUrduXZYuXZpnr66vSUNDg+3bt9O7d2/S0tJo0aJFrv5nZcqUoXfv3ixbtoz09PTPPqcaNWpgY2PDn3/+meN4doVYQS6Z/FyBmIaGBjVr1sx3vE6dOqqvk5OTGTx4MLa2tgwZMuSt150xYwZHjhxh9+7dVK1atcDmW5DOnDlDvXr1iImJ4fLly/Tq1SvP87KDsJo1a9KjRw/KlCnDpUuXOHnyJE2bNv3CsxYEQRAEQfj6RCAmCML/a8bGxowaNYrOnTvTrFkzOnTowE8//cSaNWuIiIjgwIEDNGzYkJcvX6JQKDAyMuLixYs4ODhgYmLCxIkTKV26NDt27CAyMpJ9+/ZRuXJlFi5cyC+//EJSUhIzZsxg2rRpvH79ml69elGqVClcXFzw9vb+IoHH5yKTyejZsydNmjRRVRHp6enRvHlzXFxcmDhxIkFBQRw8eBBtbW169uxJxYoV+f3333n9+vUn3btBgwbcuHGDihUr0rx5c7Zs2QJA7dq1uXnzJsOGDeOnn36iTZs2vHz58pOftSCpq6uzdetWXFxcCA8Pp3nz5rmCuwkTJhAaGsquXbu+yJyGDx/OuXPnclTWpaWlIZPJCnR54+cKxADatm2b51wbNGiAlZWV6vfTpk0jPDycjRs3vnX54969e/n999+ZN28eHTp0+Cxz/hSSJLFkyRLatGlD7dq1uXHjRp67gSoUCvbs2aMKwsqWLcvly5c5ceIETZo0+fITfwtJkkhMTCzQ5daCIAiCIAj5kgRBEIR3SkxMlHbv3i05OjpKWlpaEiCVLVtWKlKkiARIFhYW0ty5c6Xg4GDV+bt27VKdL5PJJBsbG+nXX3+Vpk6dKtWsWVMCpKJFi0oDBgyQTp48KWVmZn7lp/x4CoVCSktLkxQKRb7n3L17VxoyZIikq6sraWtrS3379pVu3rz5SfdNS0uTBg0aJAHS6NGjpYyMDNWYj4+PVLZsWcnAwEDauXPnJ93nc1AoFJKDg4MESB06dMg17ujoKFlYWEhyufyzzyU9PV0yMjKSRowYoTq2bNkySU9Pr8DuERcXJwHS7t27C+yab0pJSZFMTEykwYMHSydOnJBOnz4thYaG5jjn8uXLkkwmkxYtWvTWa/n5+Ul6enpSz5493/oz/bUkJSVJPXv2lADp559/lrKysnKdk5WVJXl4eEiWlpYSILVr1066fPnyV5jt+7l79660YsUK6ddff5V+/fVXafPmzdKrV6++9rQEQRAEQfiOySRJkr5eHCcIgvDfExcXh7e3Nx4eHpw8eRJJkjAyMiImJobMzExatmxJ79696dq1K4ULF1ad7+npyalTp1AoFLRs2RIbGxsSExPx8vIiKCiIUqVK0aNHD1xcXGjcuPE7m9b/V8XExLBp0yZWrVpFcHAwTZs2ZfTo0XTu3PmjqpEkSWLNmjWMGTMGGxsb9uzZg6GhIQCxsbGMHDlS1btr9erVFC9evKAf6aNJkkTz5s25ePEiffv2ZevWraqxy5cv07RpUw4dOoS9vf1nn8v06dNZvnw5r169onDhwsyfP58FCxYQHR1dINf38/Ojdu3aXLlyhYYNGxbINd+0bNkyxo8fz6NHj6hSpUqu8fT0dGrXrk2hQoW4fPlyvj3HoqKiqF+/PsWKFePixYvo6ekV+Fw/RVBQEJ07d+bp06ds2bKF7t275xiXy+Xs3buX2bNn8+jRI9q3b8/MmTNp1KjRV5rxu/n5+eHt7Z3ruKamJgMHDsTIyOgrzEoQBEEQhO+dCMQEQfguRUZGcu3aNV6/fo2+vj7W1tZUqVKlwEOmqKgoDhw4gIeHB2fPnkVdXZ2iRYsSExODjo4OnTt3pk+fPrRu3Rp1dXWio6M5cOAAnp6enD17FjU1NVq3bk3Dhg2JiorCy8uLV69eUaFCBZydnXFxccHa2vq7DMeysrI4fPgwK1as4Ny5c5iYmDB8+HAGDx5MyZIlP/h658+fp1u3bujp6eHl5ZVj+ZiHhwfDhg1DX1+fLVu20KZNmwJ8kk8jl8upXr06/v7+jBw5khUrVqi+382aNUMmk3HhwoXPPo+QkBAqVqzIypUrGTZsGLNmzWLDhg2EhIQUyPUPHDhA165diYyM/Kjv79skJydTqVIl7Ozs2Lx5c57nTJ8+nfnz53Pr1q0cSyjflJmZSbt27bh//z43btygfPnyBTrPT3X8+HFcXFwwNDTk4MGDOZ5DLpezZ88e5syZw6NHj+jQoQMzZ878LOFjQZLL5SxbtoykpKQ8xy0sLOjRo8cXnpUgCIIgCP8fiB5igiB8dx4/fsy6deu4efMmwcHBPHr0CA8PD44ePUpB/z8AQ0NDhgwZwpkzZwgNDWXp0qVUrVoVSZLIzMzk8OHDtG/fnrJlyzJhwgRevXrF4MGD8fHxITQ0lGXLlpGcnKwKH+rWrcv06dNVu1PWrl0bS0tLZs+eTWBgYIHO/WvT0NCgc+fOnD17ljt37tChQwfmzJlDuXLl6N+/P7dv3/6g6zVv3pwbN25QokQJmjRpgqenp2qsZ8+e3Lt3j2rVqtG2bVvGjBmjahr/tamrq3P16lUMDQ1ZtWoVEyZMUP2cTpo0iYsXL3L58uXPPo+yZcvi4ODA6tWrkSSJ1NTUAt9hslChQqrqvYK0evVqYmJimD59ep7jd+7c4Y8//mDq1Kn5hmGg3Bn1woUL7Nu375sKwyRJYu7cuXTs2JEmTZpw/fp11XPI5XJ2796NlZUVvXr1wtTUlKtXr3L06NFvPgwDeP36db5hGCgr4gRBEARBED4HUSEmCMJ3JSMjg6VLl5KWlpbnuJubG2ZmZp99HsHBwezZs4fdu3dz69YtNDQ0UFdXJz09nZo1a9KnTx969epF6dKlAWV1zt69e/H09OTq1avo6urSsWNHqlatyvPnzzl06BBJSUnUqVMHFxcXnJ2dKVeu3Gd/ji8tOjqajRs3snr1al6+fEmzZs1UyynzW+L2bykpKQwePJhdu3bxyy+/8Ntvv6Gurg4oG4yvWrWKn3/+GVNTU9zd3XPsQPg1PX78mFq1apGens6YMWNYunQpkiRhZWVFlSpV8PLy+uxzOH36NK1bt8bX15cDBw7g4+PD/fv3C+TaI0aM4OLFi9y5c6dArpctISEBU1NTevTowZo1a3KNZ2Vl0bBhQzIyMrh58yZaWlp5Xmfz5s0MHDiQP//8k2HDhqmOp6Sk8PjxY1JTUzE2NqZSpUpftGIzMTGRfv36ceDAAWbMmMHMmTNRU1NDLpfj6enJnDlzePz4MXZ2dsyYMYMGDRp8sbkVhIiICNauXZvvuLa2Nr/88ssXnJEgCIIgCP9fiEBMEITvyoMHD9i3b1++4zVr1qRz585fcEbw5MkTPD092b17Nw8ePEBDQ0O1q2Dr1q3p27cvTk5Oql5Fz549Y8+ePXh6enL79m0KFSqEnZ0dFStWJCAggKNHj5Keno6NjQ0uLi5069atwJegfW1ZWVkcOnSIFStW4OvrS9myZVXLKd+nwkj6Zwe+SZMm0a5dO3bt2kXRokVV4w8fPsTNzY179+4xa9YsJk2a9N6B2+fk7e2Nk5MTAMOGDWPVqlVs27aNAQMG8OjRI6pVq/ZZ7y9JEhYWFtSqVQsDAwNu3brF9evXC+TaHTp0QFtbu8CDvTlz5vD777/z5MkTypYtm2t8/vz5TJkyhStXrlC/fv08r3HlyhVsbW3p168fa9euVQVeN2/e5Pjx42RlZanONTIywtnZmWLFihXoc+TF39+fzp07ExISwo4dO3B0dEQul+Ph4cGcOXPw9/enU6dOzJgxI99n+5aFhoYyd+5ctLS0MDAwyPMcKysrunbt+s5rpaamcv/+feLj4ylevDjVq1dHW1u7oKcsCIIgCMJ3RARigiB8V65fv87Ro0fzHY+MjCQlJQULCwssLS2xsLCgYsWKqgqiz+3Bgwd4enqyc+dOnj59ioaGBllZWejq6tKjRw/69u2Lra0tamrKFe0BAQF4enri6enJgwcPMDAwoFOnTpQuXZp79+7h4+MDKIM1FxcXOnfuTJEiRb7Is3wpd+7cYeXKlezcuRNJknB1dWXUqFE5eoTl5+TJk/Ts2RNDQ0O8vb2xsLBQjWVkZDBr1iz++OMPGjVqxPbt279I9eC7zJw5k9mzZyOTyejfvz8rV67E3NycDh06sHHjxs9+/+XLl7NhwwZ++OEHkpOTmTp1Kqampp9cFVW1alXs7OxYsmRJAc1UuWmCqakp/fr1Y9myZbnG/f39sba2ZtSoUSxcuDDPa4SGhlKvXj0qVarEmTNnVBVkQUFBuLu75/mekiVL8uOPP6r+nH4Ohw8fxs3NjTJlynDw4EEqV66sCsICAgLo1KkTM2fOpF69ep9tDp9LREQEf/zxB2vXrkVXV5effvoJuVye6zxtbW0GDRr0zhA8MDCQffv2kZGRoTqmp6eHs7PzN7X0VRAEQRCEb4sIxARB+K6EhISwadOmfMfj4uK4dOkSjx49UvWt0dHRoWrVqqqALPvXypUr57u86lNJkoSfnx8eHh64u7sTGhqKuro6crkcIyMj+vfvT58+fXIEOPfv31eFY4GBgRgaGtKxY0eKFy/OzZs3uXDhAtra2tjZ2eHi4oKdnR26urqfZf5fQ1RUlGo5ZUhICDY2NowePRonJ6e3VncFBQXh6OhIcHAw7u7uODg45Bi/dOkSvXv35vXr1yxbtowBAwZ81U0MFAoFjo6OnDlzhrS0NFxcXLCysmLmzJn4+/tTpEgRChcu/FE7cr5Leno627dvJzQ0NMdxS0tLunbt+tEBkFwuR09Pj8WLFzNy5MiCmCoA06ZNY8mSJTx9+hRjY+McYwqFAltbW8LDw7lz506eu0WmpaVha2tLaGgoN27cyLGbobu7+1v7V/Xs2ZOqVasW2LO8Oe/Zs2cza9YsnJyc2LRpE0eOHOG3334jICAAe3t7Zs6cSd26dQv83p9bVFQUCxYsYNWqVWhqavLTTz8xduxYDAwM8Pf359y5c4SHhyOTyahcuTKtW7emVKlSb71mQkICK1euzFHFl01XV5fRo0cXaC88QRAEQRC+HyIQEwThuyJJElu3biU4ODjXmJaWFsOHD8fAwABJkggJCeHRo0c8fPiQR48eqb6Ojo4GlE3fK1eunCMks7S0pGrVqnn+5fpT5nz16lVVOBYdHY1MJkOSJCwtLRk6dCguLi6qZZFvhmmenp68ePECY2NjOnTogJ6eHleuXOHmzZsUKlQIJycnXFxcaNOmzWcJUL6GrKwsvLy8WLFiBRcuXKBcuXIMHz78rZUkiYmJ9O3bl4MHDzJ79mymTp2aI9xJTExk3LhxbNq0CQcHBzZs2PDOv4h/TnFxcTRo0IC0tDTCwsJwdHREQ0ODatWqIZPJ0NDQoEaNGrRp06ZAQ8/jx49z9erVPMfatWtHo0aNPuq6L1++pHz58hw5coSOHTt+yhRVXr9+TaVKlRg2bBgLFizINb569WpGjhzJuXPnsLW1zTUuSRL9+/fH09OTCxcu5Kq0WrBgwVs3XmjevDktW7b89Ad5Q3x8PG5ubhw5coRff/2V8uXLM3fuXAIDA3FwcGDGjBn/ySAsJiaGxYsXs2LFCgDGjBnD+PHj81x2mp6ejrq6+nsvYT5//jxnz57Nd9zOzu4/WUUnCIIgCMLnJwIxQRC+O8nJyezfv59nz56pjhUpUoQuXbpQoUKFd77/9evXqpDszV+zq2ZkMhkVK1bMFZRZWFjk2wfnfcnlci5evMjOnTvx8PAgMTERADU1NZo3b86IESPo1KmTquJBkiSuXbuGh4cHe/bsITQ0lHLlyqkCsPPnz/Po0SNKlChBt27d6NmzJ82bN/+sS72+JD8/P9VySplMplpOaW1tnetchULBb7/9xsyZM+nSpQvbtm2jUKFCOc7x9vZm8ODByGQyNm7ciL29/Zd6lFwePnxIw4YNqVOnDtbW1pQoUSLXOaVLl2bAgAEF0v9MoVCwYMEC0tPT8xwvVapUjmbzH+L8+fPY2toWaB+0iRMnsnbtWp49e5YrCH3x4gVWVla4ubnl2WgflEtDx44di7u7O66urrnGV6xYQWxsbL73v3jxIgYGBtja2mJra4uVldV7/7nKzMwkJiYGPT09ChcuDCi/305OTkRGRjJo0CC8vb158uQJjo6OzJgx45vZ/OFDxMfHs3TpUpYuXUpWVhYjR45k4sSJBbrT6P79+9+68UOjRo1o165dgd1PEARBEITvhwjEBEH4boWHhxMZGYm+vj6mpqafHALFx8fnqCTL/vX58+dk/6O0TJkyeQZlJUuW/OBleFlZWZw5c4atW7fi5eWlqlbR0dHB0dGRUaNG0aRJE9V1FQoFly5dwsPDg3379hEZGYmpqSktW7ZEkiTOnDnDixcvKFOmDM7Ozri4uFCvXr2vujywoERFRbFhwwZWr17Nq1evaN68OaNHj1ZVVr3J29sbNzc3KlasiJeXV66+YREREQwaNIi//vqLwYMHs2TJklzB2Zdy4MABli1bRqtWrfI9x97evkDCkoyMDObNm5fvuK6uLpMmTfqoa2/dupX+/fuTmppaIMvXwsLCMDMzY/z48cyZMyfHmCRJtG/fnkePHnH//v08e+qdPn2adu3aMW7cuHx7i72tWg6UoZavry9Xr14lMzOT4sWL07x5c1VAVrNmzVy9CRUKBefOnePq1auqflempqZoaGgwZMgQihUrhrq6Oi9evMDJyYkZM2ZQu3btD/14vrrExERWrFjBokWLSEtLY9iwYfz88885lqR+ioSEBI4fP463t7eqmjI/n1LZKAiCIAjC900EYoIgCJ8oJSUFf3//XGFZYGCgqq9N8eLFc4VklpaWlC1b9r0CqfT0dE6ePMn69es5ceIEmZmZquv27t2bUaNG5Qh2srKy8PX1xcPDgwMHDhATE0OVKlVo1qwZ6enp+Pj4EBERgZmZGT179sTFxYXq1at/ng/oC8rMzFQtp7x48SLlypVjxIgRDBo0KEeFVXY1TlRUFJ6enrRp0ybHdSRJYuPGjYwbNw4jIyPc3d1p3Ljxl34cQNlk/21hbpUqVXBxcfnk+0iSxPLly4mPj89zvEKFCvTr1++jrj1jxgw2bdrEq1evPmGG/zN69Gh27NjBs2fPcuweCv8L3/Jbnvn06VPq169PvXr1OHr0aJ4bakRHR9OlS5d8K/NatGihWoaZkpLClStX8PX1xdfXlytXrpCenk7RokWxsbHB1taWFi1aUKtWLXx8fLhy5Uqu68XHx7N9+3aio6Pp3LkzM2bMeK9NI741ycnJrF69mgULFpCYmMiQIUOYPHkyZcqU+eRrv3z5kkOHDnHo0CHOnj1LZmYm1tbWODo6oq6uTl7/OauhocHYsWPR19f/5PsLgiAIgvD9EYGYIAjCZ5KZmcmTJ09yLb98/PgxaWlpABQqVAgLC4tcQZmpqWm+O1+mpKTw119/sXr1ai5duqTanc3U1JShQ4eqKk3enIePjw8eHh54eXmRkJCAlZUV9evXJykpiVOnThEXF0eNGjVwcXGhZ8+emJqafv4P6DO7desWK1euZPfu3chkMtzc3Bg1ahQ1a9YElH26XFxcOHnyJAsWLOCnn37KFU4+efKE3r17c+3aNaZMmcKMGTO+eC+2P//8k9evX+c7bmpqSp8+fQrkXlevXuX48eN5jn1KE3k3NzdevHjBhQsXPmV6gDIYqVy5MtOnT2fatGk5xsLDw7GwsKBTp07s2LEj13uTkpJo3LgxaWlpXLt2Lc8eVs+fP6d9+/ZER0dz4MABUlJSuH//PqmpqRgbG9OoUSOsrKzynV9aWhpXr15VBWSXL18mLS0NIyMjhg4dmm+4+erVK4YPH/7FgrDk5GTCw8PR0dGhTJkyn1Qpmpqayrp165g3bx4xMTEMHDiQqVOnUq5cuY++piRJ3LlzB29vbw4dOsStW7fQ0NDA1tYWR0dHHBwcVEvgb926xV9//ZUjFFNXV6dLly5YWlp+9BwEQRAEQfi+iUBMEAThC5PL5bx48SLPPmXZPcO0tbWpWrVqjqDMwsICc3NztLW1VddKTExk3759rFq1itu3byNJEjKZjFq1ajFu3DicnZ1z7JSZlpbGiRMn8PDw4PDhwyQnJ1O7dm2sra2JiYnBx8eHlJQUGjZsiIuLCz169KB06dJf/DMqSK9fv2bDhg38+eefvHr1ihYtWjB69Gjs7e2RyWRMnTqV+fPn4+bmxvr163M1qc/KyuKPP/5g1qxZWFtbs2PHjhy7f35uhw8f5tatW/mOF2Rzd0mSOH36NH///TcKhQJQ9sxr164dDRs2/OjrNmnSBHNzc7Zt2/bJc/zxxx/Zt28fz549U/Xfyta1a1cuXrzIw4cPc1V2KRQKunfvzsmTJ7ly5UqeFZG3b9+mY8eO6Onpcfz4cczNzT95vunp6Vy/fp2zZ8+qPtO8WFhY0KNHj0++37vI5XKOHz/OrVu3VPMpVqwY9vb2HxyEp6ens3HjRubOnUtERAR9+/Zl+vTpVKxY8aPmlr0MNTsECw4OpkiRInTo0AFHR0c6dOiQqyIwW1RUFLdv3yYhIYHixYtTu3btfM8VBEEQBEEAEYgJgiB8MyRJIjQ0NFdI9ujRI1WFkLq6OmZmZrmWX1arVo2MjAy2bdvG2rVr8ff3B0BTU5PmzZszZcoUWrZsmaMKxM/PT1UdFh4ezrVr19DT08PCwoKwsDDOnj1LVlYWLVq0wMXFha5du1K8ePGv8tkUhMzMTA4ePMiKFSu4dOkSFSpUUO1OefLkSQYMGIClpSUHDx7Ms7Llxo0bqkqnBQsWMGLEiC+yOcHr169Zt26dqhLwTZqamowaNSpXMPSpEhMTefHiBePHj8fU1JS1a9d+0vVKly7N0KFD+fXXXz/pOk+fPqVq1arMnTuXiRMn5hjbt28f3bt3Z8+ePXTv3j3Xe+fMmcOMGTPw8vLC0dEx1/ipU6fo0qUL1apV46+//iqwflfZgoOD2bJlS77jtWrVynNeBe2vv/7i5s2buY5raGgwePDg99pdNSMjg61bt/Lbb7/x6tUrXF1dmTFjBpUrV/7g+cTHx3Ps2DG8vb05duwY8fHxlCtXDgcHBxwdHbG1tc0R6guCIAiCIBQUEYgJgiD8B0RFReUIyEJDQ9HU1ESSJCIjI7l58yYGBgaqgKxcuXL4+fnh4+Oj6tukr6+PnZ0dM2fOJCoqirNnz+a6T1hYGJs3byYrK4vGjRtTqVIlXrx4waVLl1BXV6ddu3a4uLjg4ODw1RrNF4SbN2+qllOqq6vj5uZG69atmThxImlpaezbtw8LCwuuXbtGREQEhQoVolatWpQpU4bJkyezcuVK2rZty+bNmzExMfns8w0MDMTLy4uUlBTVseTkZG7dusWRI0dyVA0WpCFDhnD58uW37uL3LikpKejr67Nt27ZPXtrZv39/jh07xtOnT9HT01Mdj4mJwcLCgiZNmnDgwIFcy/+8vb1xcnJi9uzZTJ8+Pdd1d+zYwYABA2jTpg179uz5LD/bZ86c4dixY/le29XV9aMCpQ+RmJjI0qVL8+y3Be8O5bKystixYwezZ8/mxYsXODs7M3PmzA/eOTQ4OJhDhw7h7e3NuXPnyMrKonbt2qqlkLVq1fouNvsQBEEQBOHbJgIxQRCE/xCFQoG3tzd3797NNZaens79+/d5+PAhT58+Vf2l19DQEC0tLWJiYkhLS8PAwIAxY8bkW93Uu3dvfH198fT0xMfHB4CmTZtStmxZAgMDuX79Orq6utjb2+Pi4kKHDh0+WyDzuUVGRrJ+/Xr+/PNPwsLCaNq0KfHx8WRkZNCrV69c5zdq1Ih27dpx4sQJ+vfvT1paGmvXrv0iS90yMzMJCAhg9+7d7N+/n9GjRzN27FiGDBnCypUrP8s9t23bRr9+/YiNjf3o5WcPHjzAysqKCxcu0KxZs4+ei7+/P5aWlixdupTRo0fnGOvbty+HDh3i4cOHuZb4PnjwgEaNGtG2bVv27t2b4+dekiTmz5/P5MmTGTBgAGvXri3wHnFJSUn8/PPP/Pnnnzg6OlKvXr1c1X61atXCwcHhs4dAjx8/xtPTM9/x4sWLM2rUqFzH5XI5u3btYvbs2Tx58oSuXbvy66+/vrWX2pskScLPzw9vb2+8vb3x8/NDQ0ODli1b4ujoiL29PeXLl//o5xIEQRAEQfgYIhATBEH4D7lx4wZHjhzJc0xTU5MxY8agr69PamoqAQEBuZZf+vv7U6dOHezs7PK9R+vWrWnatCmgXK534MABPD09OXfuHBoaGjRr1gxDQ0MeP37MvXv3MDAwoEuXLvTs2ZMffvgBDQ2Nz/Lsn1NmZib79+9nxYoVXLt2jXHjxuVbyTNgwADKlStHdHQ0w4YNY+/evbi6urJq1aov0rNILpdjZ2fH9evXGTt2LDNmzGDv3r1069atwO/15MkTzM3NOXr0KB06dPioaxw+fBgHBwdevXr1SbsN9urViwsXLhAYGIiOjo7q+LFjx+jYsSObN2+mf//+Od4TGxtLgwYN0NHR4e+//87xPZXL5YwZM4bVq1czffp0Zs2aVeCB1NmzZxkwYACRkZH88ccfjBgxgpiYGK5cuUJ4eDh6enpYW1tjaWn5RSqinj59mudmA9mMjY0ZOnSo6vcKhYK9e/fy66+/8vjxYxwcHJg1a9Z7Nf7PyMjI0Q/s5cuXGBgY0LFjRxwdHWnfvj0GBgYF8ViCIAiCIAgfRQRigiAI/yFr164lIiIi3/F27drRqFGjfMczMzM5evQofn5++Z5TpEgR3NzcKFmyZI7jYWFh7Nu3D09PTy5duoS2tjbNmjWjaNGi+Pn5ERQURKlSpejevTsuLi40btw4RzWOXC7n1q1bPHjwgIyMDMqWLUvjxo3z3Onvazp+/DhXr17Nd7xBgwaqcEiSJHbt2sWIESMoUqQI27ZtK7AG928TExND/fr1KVy4MGZmZvj4+HD79m0qVapUoPeRJAljY2MGDx7Mb7/99lHXWLFiBZMmTSIlJeWje67dv3+fmjVrsmbNmhyBTfaOqdWqVePEiRM5QqWsrCzs7Oy4ceMG169fz/HZpKam4urqire3N2vWrGHIkCEfNa/8JCUlMWnSJNasWYOtrS2bNm3CzMysQO/xoeRyOevWreP58+fo6+vnec4PP/yAjY0NCoUCLy8vZs6cyf379+nQoQOzZ8+mXr16b71HXFxcjn5gCQkJlC9fHkdHRxwdHbGxsRH9wARBEARB+GZ8/m7AgiAIQoGJiYl563h0dPRbxzU1NWnQoMFbz1m2bBnGxsbY2tqydOlSnj59Cigbo48aNYqLFy/y4sULfv/9dxISEti/fz+hoaG0adOGRo0acfDgQZo1a4apqSmTJk3i9u3bZGVlsWvXLo4ePcqLFy8ICwvj+vXrrFu3jtDQ0A/7ED6zdy3dSktLU30tk8lwdXXl7t27mJmZ8cMPPzB+/Pgc53wOxYsX5+DBgwQGBqKuro6hoSE9evQgPT29QO8jk8lo2rQply5d+uhrPH36FFNT00/agGDmzJlUqFAhVwXY5MmTiYmJYf369bkqrCZPnszp06fZs2dPjjAsOjqa1q1bc/z4cby8vAo8DDtz5gw1atRg27ZtrFy5kjNnznz1MOz69es0atSIESNGkJqammc1mrGxMQ0aNODw4cPUrVuXrl27YmxszOXLlzl69Gi+YdiLFy9YsWIFrVu3pmTJkvTq1YvAwEDGjx+Pn58fz58/Z8WKFbRq1UqEYYIgCIIgfFNEICYIgvAf8q7dBNevX8/48eO5cOFCnrsSgjLYql69ep5jVapU4dq1a6xfv54iRYowefJkzMzMsLa2ZubMmdy+fRtJkihfvjzjx4/n2rVrPHnyhOnTpxMZGcmhQ4dISEigbdu2VK9enc2bN1OnTh0cHR1Vwdqb0tPT810C+rW8q0l+QkJCrmPly5fn9OnTLF68mFWrVlGvXj3u3LnzuaYIQM2aNdmyZQt79+7FycmJe/fu5dp5sSA0bdqUq1evkpmZ+VHvf/r06SdVrt26dYsDBw4wY8aMHIHK+fPn+fPPP5k3bx4VK1bM8Z6dO3eyaNEiFi9eTKtWrVTHX7x4QbNmzQgICODs2bPY29t/9Lz+LTExkeHDh9OqVSsqVKjAvXv3GDly5BfZiTQ/MTEx/PjjjzRs2JCsrCwuX77MypUr6d+/P1WrVqVQoUIYGhrSvHlzypYti42NDQ4ODhgYGODr68upU6do3LhxjmtKksTNmzeZMWMGtWrVomLFikyYMAF1dXWWL1/Oy5cvVePW1taiOb4gCIIgCN8ssWRSEAThP+TChQucOXMmzzGZTEZ4eDgHDx4kPDwcQ0ND7O3tcXR0pE2bNjl25ZPL5Zw7d44bN26QlpaGtrY2derUoWXLljmaiiclJXHixAm8vLz466+/iIuLo3z58jg5OeHk5ISNjU2OnmHZTbs9PDx4/PgxRYsWpWHDhlhYWLy1v9aYMWO+SP+t95Xd+PvfUlNTWbp0KZ06dWLp0qV5VpPdu3cPNzc3Hj16xG+//cb48eNRV1f/bHP9+eefWbx4McOHD2flypXs37+fLl26FNj1//77b5o0acK1a9eoX7/+B7/fysqKli1bfnTj/06dOhEQEMDDhw9VP2upqalYW1tTqlQpzp8/nyN0unHjBjY2Njg7O7NlyxZVIOPn50fHjh3R1dXl+PHjmJubf9R88nLmzBkGDBhAVFQU8+fPZ9iwYV81CFMoFGzbto1JkyaRkZHBnDlzGD58eK7+fpIkcebMGWbMmMHly5dp0qQJc+bMoWXLljmCrPT0dM6dO6fqB/bq1SuKFi2aox9YkSJFvvRjCoIgCIIgfBIRiAmCIPyHZGVlsXPnTp4/f55rrGPHjtSvXx+FQsG1a9fw8vLC29ubx48fo6urS7t27XB0dKRTp04YGhoCyr84p6amoqOj887QJjMzk/Pnz+Pl5YWXlxchISEUL16cTp064eTkRNu2bVW9iSRJ4t69e3h6euLp6UmLFi0oV65cvtd2dXWlcuXKH//BFLCsrCxOnTrFzZs3VZV2FStWxN7enhMnTjB+/Hji4uKYNm0a48ePz7XLZnp6OjNmzGDhwoU0a9aM7du356piKihyuZwOHTpw69YtGjRowOXLl7l161aB9RNLT0/HwMCAP/74g7Fjx37QeyVJolChQvz222+MGzfug+995coVGjduzM6dO3Ps+jlp0iRWrFiBn58f1apVUx2PiIigXr16lClTBl9fX1XzfR8fH7p06UKVKlU4cuQIRkZGHzyXvCQmJjJp0iTWrl1LixYt2LRpU4H3cftQd+7cYfjw4Vy+fBlXV1cWLlyYa+dNUFbYTZ8+nfPnz1O/fn3mzJlD27ZtVUFYbGwsR48exdvbm+PHj5OYmEjFihVxdHTEwcEBGxubAt+RUxAEQRAE4UsSgZggCMJ/jFwu5/bt29y7d4/U1FSMjIxo0KBBvoGTv78/3t7eeHl5ceXKFWQyGc2aNcPJyQlHR8eP+gt89rKp7HDswYMH6Orq0rZtW5ycnHKEbpIk4e7unueSSYDk5GRWrFhB+/btcXV1pVOnTujq6n7wnD6HtLQ0oqOj0dfXz1HBlpCQwOzZs1m2bBmVKlVi5cqVtGvXLtf7z58/T58+fYiJiWHlypX06dPnsywhi46Opl69ehQpUoSEhAQMDQ25ePFirqDuY9nY2GBsbMzevXs/6H0REREYGxtz8OBBnJycPvi+bdu2JTQ0lDt37qgC2xs3btCwYUN+++03Jk+erDo3IyODH374gaCgIG7evKna0dLd3Z3+/fvTunVr9u7dm+/uoR/q9OnTDBw4kKioKBYsWMCPP/74VavCEhISmDlzJitXrqRKlSqsXr06zw0e/v77b2bMmIGPjw+1a9dm9uzZ2NnZIZPJePbsGYcOHcLb25vz588jl8upV6+eKgSrUaOGWAIpCIIgCML3QxIEQRC+SQqFQkpKSpLS09ML7JphYWHS+vXrpY4dO0paWloSINWoUUOaPn26dPPmTUmhUHzUdQMCAqSFCxdKTZs2lWQymaSmpibZ2tpKS5culZ4+fSrFx8dL8+fPl3799ddcr7Zt20o//PCDZGlpKQFS4cKFpb59+0onT56UMjMzC+zZP4f79+9LLVq0kACpS5cu0osXL3KdExcXJ/Xt21d1zuvXrz/LXPz8/CRdXV2pQ4cOkqampjRmzJgCu/bPP/8slSlT5oN/Pi5fviwB0p07dz74nr6+vhIg7du3T3UsPT1dqlGjhlS7dm0pIyNDdVyhUEiDBw+WtLS0pMuXL6uO/fHHHxIg9evXL8f5nyIhIUEaOnSoBEgtW7aUnj59WiDX/VgKhULatWuXVLp0aUlPT0+aP39+nv/MuHbtmtS+fXsJkKysrKQDBw5IWVlZ0vXr16Vp06ZJNWrUkABJS0tLat++vbRmzRopJCTkKzyRIAiCIAjClyEqxARBEL5Bt2/f5vz588TFxSGTyahWrRrt2rXDwMCgwO6RmJjIiRMn8Pb2VvUHK1euHI6Ojjg6OmJra/tRS6IiIiI4fPgwXl5enDp1ioyMDKytrXFycsLQ0FC1E6a+vj6lS5fm4sWL7Nu3j6ioKCpUqED58uV58eIFwcHBGBkZ0bNnT1xdXalXr943WZ0iSRIeHh6qZZTTp0/np59+ylWdtX//foYOHYqGhgabN2+mY8eOBT6X3bt306tXL7p27cr+/fs5cOAAnTt3/uTrHj58GAcHB549e/ZBSz937tyJm5sbCQkJ79wQ4k2SJGFra0tiYiI3b95UVV7Nnj2b2bNnc+PGDWrVqqU6f82aNQwfPpzNmzfTv39/5HI5Y8eOZdWqVUyfPp1Zs2YVyM+Oj48PAwcOJDo6moULFzJ06NCvWhX26NEjRowYwdmzZ+natStLly7NVSnq5+fHjBkzOHz4MBYWFkydOpVixYpx+PBhDh06RGhoKMWKFcPOzg5HR0fatWv3Qd8rQRAEQRCE/yoRiAmCIHxjrl+/ztGjR3MdNzAwYOjQoZ9lOWF2f7DspZUvX75UNc12cnKiffv2H/WX5OzQLbspf3x8PNWqVaNDhw7Y29urmvJnZWVx9uxZPDw8OHDgAHFxcZiammJkZMSTJ0+IiorC3NycXr164erqWqAN0QvK+yyjDAsLY+DAgRw7doxhw4axcOFCVd+1gjJhwgSWLVtG06ZNuXPnDrdv38bU1PSTrhkdHY2hoSHu7u64urq+9/vmzJnDypUriYyM/KD7nTp1irZt23L48GE6deoEwIMHD6hduzYTJ07k999/V517/vx5WrVqxbBhw1ixYgWpqam4ubnh5eXFn3/+ydChQz/o3nlJSEhg4sSJrF+/npYtW7Jp06ZP/kw/RXJyMnPmzOHAgQO0aNGCihUrUrx4caytrWnQoAHq6urcv3+fmTNncuDAAczMzGjTpg2vX7/mxIkTJCUlYWpqqgq/mzVrlqvhfl4yMjKIjIxEW1sbQ0PDbzKgFgRBEARBeF8iEBMEQfiGZGVlsWTJElJTU/Mcb9myJc2bN/+sc5AkCT8/P1V/sLt376KlpUWrVq1wcnLCwcEBY2PjD75uRkYGvr6+qmb/r169onjx4tjb29O5c2fVTpgZGRmcPHkST09PvL29SUxMxMzMDAMDA/z9/UlOTqZ+/fr06tWLnj17ftRcPqcHDx4wcuRIzp07R5cuXXLtRilJEmvXrmX8+PGULVuWHTt20LBhwwK7f1ZWFu3atePu3bvo6upibGzMxYsX0dLS+qTrWlhY0LJlS/7888/3fs+AAQN48OABV69efe/3SJJE48aNAWW/K5lMhlwup2nTpsTHx3P79m1Vs/zg4GDq1auHlZUVJ06cIDExEUdHR27evImHhwcODg4f9pB5OHXqFIMGDfomqsIkScLLy4uxY8dSsmRJ7O3tc51TpkwZTp8+zZ49eyhWrBglS5YkMDAQhUJBgwYNcHBwwNHRkerVq793oKVQKDh37hxXr14lIyMDgFKlSmFnZ5fnTquCIAiCIAj/BSIQEwRB+IaEhoayYcOGfMcNDAxwcnKiXLly79wVsqA8e/YMb29vVaNtSZJo2LChqin/m7v8vS+FQpGjKf/Dhw9VO2FmN+UvUaIEqampHD9+HA8PDw4fPkxqaipmZmbo6Ojg7++PQqGgVatWuLq60rlzZ4oUKfIZPoEP9z7LKP39/enduze3bt1i2rRpTJ06tcB27YuKiqJevXro6ekRGBjIyJEjWbp06Sddc+DAgdy4cYM7d+6893tatGhB6dKl2b1793u/58iRI3Tq1ImTJ0/Spk0bAJYsWcKECRO4ePEiTZo0ASAlJYVmzZoRGxvL9evXSU5OpkOHDkRGRvLXX3/RqFGjD3vAf0lISGDChAls2LCBH374gU2bNn22nULfR1BQEKNGjeLYsWPY29vTuHFjVTj1b3v27OHhw4doaWnRunVrHBwcsLe3V2008KFOnTrF5cuXcx3X1NRk8ODBlCxZ8qOuKwiCIAiC8DWJQEwQBOEbEhERwdq1a/MdDwgIYNeuXWhpaWFmZoa5uTnm5uZUqVJF9XWZMmU+WwVLVFQUR44cwdvbm+PHj5OamkrVqlVxdHTEycmJhg0bftS9AwICVMs1s6uCmjdvrgrdKlasSHJyMn/99RceHh4cO3aM9PR0zMzMkMlkPHnyBB0dHezt7XF1daV9+/YFtsPip3jXMsrMzEx+//13fvvtN+rWrcuOHTuoUqVKgdz79u3bNGnShJo1a3Lt2rWP3ukx2+bNmxk0aBCxsbHv3cuufPny9O7dO8cSx7eRJIm6detSqFAhfH19Vd/bmjVrMnjwYJYvX05cXBx///0358+fJzo6mrZt21KpUiU6deqEjo4Ox48f/+TP8OTJk6pnXbRoEUOGDPlqywPT0tL4448/+OOPPzAyMmLFihVYWlqya9eufN8THx9P06ZNadeu3SfvqpmSksKSJUuQy+V5jteqVQtHR8dPuocgCIIgCMLXIAIxQRCEb4gkSaxatYqYmJg8x7OX1gUGBhIQEEBgYCCBgYE8f/4chUIBgK6uLpUrV1YFZG+GZkZGRgX2F/vU1FR8fHzw8vLi0KFDREVFYWRkhIODA05OTvzwww+qpW0fIjw8XNWU38fHh4yMDGrVqoWTkxNOTk7UrFmTxMREvL298fDw4OTJkygUCkxNTUlPTyckJIRixYrRrVs3XF1dsbGx+aqNz+HdyyivXr1K7969CQkJYfHixfz4448F8n1yd3end+/e1KxZk+DgYG7fvv3RVU7+/v5Uq1aN48eP5+qNlpf09HR0dXXZsGEDAwcOfK97HDhwgK5du+Lr60vz5s2RJIkffviB58+fc+/ePZKTk9m6dStpaWk53hcaGsrt27c5dOjQJy2hfbMqrFWrVmzcuPGrVoUdO3aMUaNGERwczIQJE5g6dSr6+vqcPn2aixcv5vu+mjVrFshmCgB37tzBy8sr3/FixYoxevToArmXIAiCIAjClyQCMUEQhG9MUFAQu3fvzlWRUaFCBXr37p3nUsmMjAyePXumCsjefAUHB5P9j/pChQrlCsqyX5/SJFsul/P333+rlkAGBQVRqFAh2rdvj5OTEx07dqRYsWIffN2EhASOHz+Ol5cXR44cISEhgYoVK6rCsaZNm5KQkMDBgwfx8PDgzJkzyGQyKlSoQHx8PNHR0ZQtWxYXFxdcXV2pWbPmV6v0edcyyuTkZCZMmMDatWvp0KEDmzZtonTp0p9833HjxrFq1SoMDQ0pX748Fy5c+Kh+YpIkUapUKYYNG8bs2bPfeX5AQABVq1blzJkztGzZ8p3ny+VyrK2tKV26NKdOnQJg/fr1DB06lFOnTtG6dWu2bt3Kixcv8nx/8+bN3+s++Tlx4gSDBw/+JqrCgoODGTt2LAcPHuSHH35g9erVGBgY4OHhgbu7O48ePWL8+PH5NsJ3dHTMsQvn+5AkieDgYPz8/Lhz5w537tzBz88PSZLo27dvvu8rWbIkw4cP/6B7CYIgCIIgfAtEICYIgvANCgsL49KlS4SEhKCjo0ONGjVo0KDBR/WYSktLIygoKM+w7NWrV6rzDAwMciy9fPP1IWGWJEk8fPhQ1Tz/+vXraGhoYGtrq9rV7mMacWdkZHDu3DnVdUNDQzE0NMTe3h4nJyfatGlDYmIi+/fvx8PDgwsXLqChoYGJiQlRUVEkJSVhaWmJq6srvXr1+mqVP+9aRnn06FEGDBhAVlYW69evp0uXLp90v8zMTNq2bcvdu3dJSEhg9OjRLF68+KOu5ejoSHJyMj4+Pu8898SJE7Rv355nz56912e9e/duevXqxd9//02jRo0ICQnB0tKSHj16sHHjRpKTk1m0aFG+7zcxMWHQoEEf8jiAcnnhhAkT2LhxI61bt2bjxo1UqFDhg69TEDIyMli6dCmzZ8/GwMCAuXPnoqamxs6dO/Hx8UFDQwN7e3vc3NwoXLhwnlVixsbGDBw48K27RqalpfHgwYMcwdfdu3eJi4sDoHjx4tSqVQtra2tq1qxJWFhYvv3KbGxs+OGHHwrk+QVBEARBEL4kEYgJgiD8P5acnExQUFCO5ZfZr4iICNV5JUqUyLNfmbm5OYULF37rPV69esWhQ4fw8vLizJkzZGVlUadOHVXfsRo1anxwJY5CoeDGjRuqirRHjx6hp6enaspvZ2dHWloa+/btw8PDgytXrqClpYWRkRERERFkZGTQpEkTXF1d6dGjB4aGhh/1+X2K+/fvM3LkSHx9fenatStLlixRBYVRUVEMGTKEgwcP0rdvX1asWPFJGwa8fv2aunXrIpPJCA4OxsvL66P6Pi1YsIDZs2cTFxf31sAFYM2aNYwePZrU1NR3npuVlUX16tUxNzfnr7/+QpIk7O3tuXXrFg8fPqRo0aK8fPmSzZs353uN7Oq1D3HixAkGDRpEXFwcixcvZvDgwV+tKuzs2bOMGDGCgIAA7O3t0dTU5MiRI6SkpGBra4ubmxtdu3ZVhdOSJHH9+nUuX75MfHw8Ghoa1KhRgzZt2qCrq6u6bkRERI7g686dOzx+/Bi5XI5MJsPc3Bxra2usra1VIZiJiUmOz+HevXscOHAg15xLlCjBwIEDc9xPEARBEAThv0IEYoIgCEKeEhISePLkiSogezM0i46OVp1nZGSUZ7+yypUro6enl+Oa8fHxHDt2DC8vL44ePUpiYiKmpqaqcKxp06bvDE8kSSIqKgq5XI6hoSEaGhr4+/vnaMqvrq6eoyk/KHfe8/Dw4NatW2hra1O8eHEiIiKQyWS0b9+eXr164ejoiL6+fsF/mG95luxllPHx8UybNk21jFKSJLZt28bo0aMpXrw427dvp3nz5h99r5s3b9K0aVOMjIyIj4/nzp07H1wJdfHiRWxsbLh58yZ16tR567kTJ07k4MGDPHny5J3X3bp1K/3791ddd9euXbi6uqqCO4VCgaOjI+bm5vk29K9Xrx52dnbv9Rzx8fGMHz+eTZs20aZNGzZs2PDVqsLCwsIYP348u3fvpnTp0qSnpxMTE4OlpSW9e/fGxcXlrXOTJIm0tDTU1NQICgrKEXzduXOH8PBwAPT19alZs2aO4KtGjRrv/fP+9OlTLl68SEhICNra2lSvXp3mzZvn+jMuCIIgCILwXyECMUEQBOGDxcbG5rkEMyAggPj4eNV5JiYmeS7BzN4dMnsJ5KFDhwgNDaVEiRJ06tQJJycn2rZtm+sv2w8fPsTHx4fY2FhAuYFA06ZNadKkiaqiJSwsTFWRdvr0aTIzM6ldu7aq75iOjg579+7Fw8OD+/fvo6urS6FChXj9+jV6enp07tyZXr160aZNm49aovox3raM8tmzZ/Tp04dLly4xceJEZs+e/dE7aG7fvp2+fftSvHhxzM3NOX/+/Af1E0tLS8PAwIBFixYxatSot57btWtXEhMTOXny5FvPy8zMpGrVqtSuXZv9+/cTGRmJpaUlrVu3xsPDA4Bp06Yxd+5c5s2bl6uhPoC2tjZDhgyhePHi73yG48ePM3jwYOLj41m8eDGDBg36KlVhWVlZzJo1i4ULF5KVlYVcLsfY2BhXV1fc3NywtrbOd17x8fHcvXs3R/B1//591WdTrly5HMGXtbU1ZmZmX31zCUEQBEEQhG+JCMQEQRCEApNdvZVXWBYYGEhSUhIAMpmM8uXLqwKyypUrA8pG7OfPn+fRo0fo6OjQtm1bHB0dsbe35/Xr1+zduzfP+zZr1oxWrVrlOp6QkKCqSDty5AiJiYlUqlRJFY4VLVpUtawyICAAPT09tLS0iIuLw9DQEGdnZ1xdXWnUqNEXCU3yW0Ypl8tZtGgR06dPx9LSEnd3d6ysrD7qHqNHj+bPP/8EYOzYsW/ty5WXJk2aUL58eVVYlZ86depQv3591q1b99bz1q9fz48//sjdu3exsrKiZ8+e+Pj48PDhQ0qVKsWePXtwdnZmwoQJbNmyhZYtW9KkSRMSEhIAZfjToUOHd25AEBcXx/jx49m8eTNt2rRh48aNH9XL7n0pFAqCgoIICwtDT08PCwsL9PX1iY6OZt68eaxZs4aUlBQ0NDTo1q0bAwcOpGXLljk2zZAkiefPn+dqdP/8+XMAtLS0sLS0zBF8WVtbv1cwKAiCIAiC8P+dCMQEQRCEL0KSJCIiIvLsV/bkyRNSU1MBUFdXp0yZMujq6pKYmEhYWBgymYwJEybku7xLTU2Nn3766a3Lv9LT03M05Q8LC8PQ0BAHBwccHR0pVaoU3t7eeHp68uzZM/T09JDJZCQnJ1OxYkVVM35LS8vP8vlke9sySj8/P9zc3AgMDGTevHmMHTv2g6t+MjMzad26Nbdv3yYxMZFDhw5hb2//3u+fOHEiHh4evHz58q3PULRoUaZMmcLPP/+c73lpaWmYm5tjY2PDrl278Pb2xsnJCXd3d1xdXblz5w5NmjShbdu23L59Gz09PS5evEixYsWIi4tDU1OTQoUKvXPOx44dY/DgwSQkJLBkyRIGDhz4WQPOpKQkdu3aRVhYmOqYTCYjICCAXbt2IUkSBgYGTJw4kXHjxqGnp0dqair379/P1eg+O/gzNDTMEXzVqlWLatWqfbEqRkEQBEEQhO+NCMQEQRCEr06hUBAaGppnv7KgoCAKFy78ziV6Xbp0oUaNGu99v+vXr+Pl5cXBgwfx9/dHT0+P9u3b4+joSOnSpTl+/Dienp68evUKfX19srKySE9Px9raGjc3N1xcXDAxMSmIx8/Tv5dRrlq1irZt25KWlsbUqVNZsmQJLVu2ZOvWrR9c6RQREUG9evVITk5GkiTu3Lnz3tfw8vKic+fOvHjxIt/3xMTEUKJECfbs2UP37t3zvdbKlSsZO3YsDx8+xMjICEtLS+rUqcPhw4eJjo6mXr16qp5h0dHRXL58+YOeNS4ujp9++oktW7bQtm1bNmzY8FmrwrLt2LGDp0+f5jquUCjYvHkz/fr1o1GjRty/f19V/eXv749CoUBNTY0qVarkanRfunTpr9bwXxAEQRAE4XskAjFBEAThmyaXy7l79y6HDh1663mpqak0bdoUGxsbihYt+kH3ePz4sWrHyqtXr6Kuro6trS0ODg6ULVuWc+fOsXfvXiIiItDX1yctLQ25XJ7n7n8FLb9llGfOnKFv374kJiayevVqevXq9UGByfXr17GxsUFTUxMrKyvOnz//XtVGkZGRGBkZsWvXLlxcXPI858aNG9SvX58bN25Qt27dPM9JSUmhUqVKtG/fnq1btzJo0CD27NmjCsfatm3LgwcPqFKlCvfv3+fChQvvHXgCHD16lCFDhpCYmMiSJUsYMGDAFwmU/v7777f2TfPz88PLywuAwoUL52p0b2VlJRrVC4IgCIIgfAEiEBMEQfiOZWZm8ujRI16/fk3hwoX/s3/ZlsvlLF26lOTk5HzP8fT05NGjR8hkMqytrbG1tcXW1pbmzZtTokSJ975XaGioqin/mTNnyMzMpE6dOjg6OmJiYsLVq1fZv38/MTEx6OnpkZKSgqamJnZ2dri6utKpUyd0dHQK4rFV8ltGmZqaysiRI9m5cyc9evRgzZo1H9Q/asuWLQwYMEC15HThwoXv9b4qVarQrl07Vq5cmed4dt+vmJiYfIPCRYsWMXnyZPz9/Xn27BmtW7dm7dq1DB06lNGjR7NmzRpatGjB+fPnOXHiBC1atHivucXFxTFu3Di2bt1Ku3bt2LBhA+XKlXuv936skJAQdu/ejbu7O8nJyfTu3TvfcyVJUoVgpqamotG9IAiCIAjCVyICMUEQhO9UZGQkO3fuVPUgAmUT7i5dulC1atWvOLOP87bKmxo1atC5c2eePXuGr6+v6pXdfNzKygpbW1tatGhB8+bNKVWq1HvdMz4+XtWU/+jRoyQmJmJmZoa9vT1ly5bl3r17HDx4kISEBHR1dUlNTUVfX5/u3bvj6uqaq0n6p0pISGDWrFksX748xzJKT09Phg0bhq6urmp54PsaMWIE69atQy6Xc/jwYTp16vTO9/Tr14+7d+9y69atPMfnz5/PvHnziIuLy3M8e3ODLl26sGTJEmrUqEGFChU4ffo0W7ZsYdCgQbRu3ZrTp0/j6en51mWXbzp69CiDBw8mKSnps1eFxcfHc+DAAdzd3Tl79iyamppUrVoVmUxGly5d8n1fw4YNad++/WeZkyAIgiAIgvD+RCAmCILwHVIoFPz5559ER0fnGtPQ0GD06NEULlz4K8zs40mShK+vLxcvXkQul6uO16hRA3t7+zyX+wUHB+cIyJ48eQKAhYWFqoLM1tb2nTsUgrIp/9mzZ1VN+cPDwylZsiR2dnaULVuWwMBADh8+TEpKClpaWmRkZFCyZEnc3Nzo1asXdevWLbBwJq9llOrq6vTv359Tp04xcuRI5s+f/17VgBkZGbRq1Yrr16+jq6vL3bt331lRtWHDBn788Ufi4uLy/DkaOnQo169fzzcw+/3335k9ezZPnjxh8eLFrF+/nnv37hEZGYmtrS316tXj77//Zvny5YwePfqdz/BmVVj79u1Zv379Z6kKy8jI4MSJE7i7u3Po0CHS0tKoXLkyWVlZPH/+HD09Pezt7alfv75qR9U3yWQyhg0bRsmSJQt8boIgCIIgCMKHEYGYIAjCd+jp06fs2LEj3/FWrVrRrFmzLzijgpOSksLTp0+Ry+WUL1/+g3p3vXr1ivPnz+Pr68u5c+fw9/cHwNzcPEdA9q4wRaFQcPXqVVVT/sDAQPT19WndujXly5fnxYsXnDhxgvT0dDQ0NMjKysLU1JR+/frRq1cvKleu/EmfAeS9jHLs2LFs3LiRSZMmUbFiRdzd3fPt4fWm8PBwateuTUxMDHXq1HlnP7GHDx9SvXp1Tp06RevWrXONt2nTBgMDA/bt25drLC4uDlNTU3r37k3Pnj1p1qwZixYtwtnZWdVE39/fn0mTJjF//vx3zv3IkSMMGTKEpKQkli5dSv/+/Qu0KkySJK5cuYK7uzuenp5ER0dTpkwZ1NXVefnyJTo6OtjZ2eHs7IydnR16enp57jKpoaGBg4PDB/VBEwRBEARBED4fEYgJgiB8h27fvv3WJvSSJFGrVi1q165N+fLl/9/uXhceHq4KyHx9fXnw4AEApqamOQKyihUr5vsZSZKUoyn/tWvX0NDQoGnTplSsWJFXr17h6+tLZmYmampqKBQKateuTb9+/XB2dsbIyOiTniGvZZTlypXDzc2Nu3fvMnPmTH755Rc0NDTeep0rV67QvHlzsrKymDhx4lvDKIVCgaGhIWPGjGHmzJm5xs3MzOjatSsLFizINTZjxgwWLlzIw4cP6dixI0WKFOHMmTO0bNmS58+fExcXh7OzM9u2bXtrf63Y2FjGjRvHtm3baN++PRs2bKBs2bJvfcYPERAQwM6dO3F3d+fp06cYGBhQqFAhXr16hZaWFh06dMDZ2Rl7e3sKFSqU6/0KhYKnT58SGhqKnp4eFhYW6OvrF9j8BEEQBEEQhE8jAjFBEITv0IsXL9i6dWu+4xcvXsTHxweAYsWKqcKx7FfVqlXfGaB8j6KionIEZHfv3kWSJMqVK5cjIKtcuXK+AdmrV69yNOXPysqiVq1aVKpUibCwMK5du4ZcLle9v0WLFvTr1w8nJyeKFCny0XP/9zLKP/74g61btzJv3jwaNmzIjh07MDMzy/O9r1+/xtfXl4cPH5KcnIy/vz99+vR5ay+sTp06kZGRkauvW1ZWFjo6OqxcuZJhw4blGIuOjsbU1JShQ4eira3NggULuHXrFosWLcLDw0MVJB4+fBgtLa18751dFZacnMzSpUvp169fgYS6kZGReHp64u7uzrVr19DR0aFo0aJERESgoaFB27ZtcXZ2xsHBAQMDg0++nyAIgiAIgvD1iEBMEAThOyRJEuvXryc8PDzXmLa2NqNGjSI+Pp7bt2+rXn5+fjx9+hQAHR0datSokSMkq1Gjxn9yh8pPERsby4ULF1QB2e3bt1EoFJQpU4bmzZurArJq1arlGcjExcXlaMqflJSEqakplStXJjIykrt37wLK71f2TpX9+/enffv2bw2E8iNJErt372bChAmqZZSNGzdm4MCBREREsHTpUgYNGpRjrhEREWzevJmMjIwc10pOTqZ///5YWFjkea958+Yxb948YmNjc2wc8Pz5c0xNTTl+/Djt2rXL8Z5ffvmFVatWcejQIdq2bcuMGTMoUqQI48aNo2jRolSqVIlz587l298uNjaWsWPHsn37djp06MD69es/uSosJSUFb29v3N3dOX78ODKZjGLFihEdHY26ujqtW7emR48eODk5fdDyXEEQBEEQBOHbJgIxQRCE71RsbCy7d+/m9evXqmN6enp0796dihUr5jz55k2YOhXp8mXkWVm8KleOjVWqcCg4mIcPHyp7YMlkPH3bvzIGDYING/Ie+/13mDYNqleH+/dzjs2dC4cOQVAQJCZCuXJgZwdTp8KbzccfP4bNm+HkSeW5hQpBnTowaxbUq5fzmgcPwtq1cO8eREcrr9OoEfz6K1hZ/e+8c+egZcv8n+m335Tz+Ed8fDyXLl1SBWQ3btxALpdTqlSpHBVklpaWuZb7paWlcebMGVVT/sjISEqUKEGVKlWIiooiMDAQmUyGJEno6+vTo0cP+vXrR7Nmzd66dDAvby6jNDMzY/78+Rw5coSNGzdib2/Phg0bVEs1d+3aRWBgYJ7XCQ4OZu3atXn2Ezt//jy2trbcvn2bWrVqqY6fOXOGVq1aERAQgLm5uep4REQElSpVYvTo0Zw4cYKsrCzmz59Pp06dKFq0KAYGBly+fBljY+M85/LXX38xZMgQUlJSWLZsGX379v3oqjC5XM6ZM2dwd3dn//79JCcnU7RoUdWOrD/88AM9evSgS5culChR4qPuIQiCIAiCIHzbRCAmCILwHcvuY/T69WsKFy5M1apVc4cbt25B06bKIGroUFAo4M8/ISYGrl0jrUIFHjx4wL0rV+DgQV4EBxMcHEx6ejoAXfT06JKSwp5u3dBydc3dlywkBKpWBZkMKlbMHYh17aoMrKpVQ6Gvj+zxY2QbN0KpUuDnB9l9lyZMgE2blOc3aADx8bBuHTx/DsePw5vN3WfPhocPoXZtMDSE8HBlmBYWBn//DdbWyvMiIuDUqdwf3I4dyuDt2jWoXz/fzzcpKYnLly+rArJr166RmZlJiRIlclSQ1axZM0eoJZfLczTlf/LkCbq6upibmxMTE0NISIgqHCtRogR9+vShX79+1KxZ893f9Df8exllhw4dmDx5MgAbN27Ezs6O33//nfz+UyB7p8SFCxfmGktJScHAwIDly5czfPhw1fGNGzcyZMgQ0tLSclS5jRs3ji1btjBmzBh+++039u3bx8CBA5EkCXV1df7+++8cAVq22NhYxowZw44dO+jYsSPr16/HxMTkgz4HUFbP+fn54e7ujru7O5GRkejr65OWloZCoaB58+Y4OzvTtWtXSpUq9cHXFwRBEARBEP5bRCAmCILwHyWXy/H39yc8PBx9fX2qV6+eZ3Pvd7KzU4ZEgYGQXQ0TFgZVqkDbtrB/f87zk5JQzJ9P6rlzaNy+jXZyMulqalQuXpyQqCgAihcvjnOlSrhlZlI3MBCt1FRkkpRnhZiUkkK0iwtq16+jFxODmiSRWaQI+lFRymDKzU154s2bymAtKwsmTVJWgSUnK39vaakMz/ITFKQ8JyMDunTJ/Uxxcf+7ZkoKSJIypHvx4n/nvEc1Wcq4cVy5ckUVkF25coX09HS2amnRNyODIAsLYrdvp1atWqoebZIk8ejRI1VT/uvXr6Ouro6pqSnx8fE5KvzKly/PoEGD6NOnDxUqVMh/Lm9+vv9aRjl27Fju3LnDkSNHGDhwIOXLl883EEtOTmbhwoUcPXqUDh065Bpv2LAhlStXZufOnapjU6ZMwd3dneDgYNWxkJAQKleuzNChQ1m3bh0//vgjPj4+PH/+HIVCwblz52jQoEGu6x8+fJihQ4eSkpLC8uXL6dOnzwdXhb148YJdu3axbds2/P390dLSQi6XI5fLadasGT169KBbt26ULl36g64rCIIgCIIg/LeJQEwQBOE/KC4ujp07dxL1TwAFoKGhgYODAzVq1PiwixUpAu3bw549OY936qSsnoqOVi5PzPb8OZiaQvnyYGKiDNOaNUM6f57Q0FBVPzLznTvp8vgxGsBzwBQI0tVlYZ8+qib+VlZWnPfyosbPP/OiQgXiihZFksmo4u9PlSdPyKhVC63bt/93b4UCbGzgzh2YOFFZ/TV5snKpZUAA5FFhBICDA5w+rQy72rRRVn/ld82kJFi0CLS0lOFd9jU/oposLS2NB9u2UWv4cLKAM0BHhYLChQvTrFkzVQVZ3bp1VZV7ISEhqqb8Z8+eJSsrizJlypCcnEx8fLzq2tWrV+fHH3+kZ8+eGBoa5v/9/ce/d6O0s7Njw4YNuLq6UqZMmTzfo6Ojwy+//IK+vj6PHz/O1a/rp59+4sCBAzx//lx1zMXFhbCwMM6dO6c6Nnz4cDw9PTE3NycqKorq1atz7Ngx5HI5hw8fpmPHjjmuGxMTw5gxY3B3d8fOzo5169Z9UFVYbGws+/btY9u2bVy6dEnV40wul9OwYUOcnZ3p3r17ge5KKQiCIAiCIPy3iEBMEAThP0aSJDZv3kxISEiuMTU1NYYPH/5hfY+0taFnT9i2LefxHj1g715l4NWo0f+Op6dDbCwYG8NPP8HSpcrljP9eVhcaCu3bk163LlFxcZh4eRFStCgdy5Xj4cOHyOVyKleujJubG0gSuikpqCkUlIiJoZWPD+VevkQGymq17L5Se/aAs7NyXt26KY81aIB08yaxbdqwv2tXYmJi0NXVxdLSksYWFuhfvKisMqteXRl6TZmi7GmW7d/XHDMGVqyAwoWVoeCuXW///MzNlctBAwJyj0mScjmqhQWcPo3C0pK/p05VVZBdunSJ5ORk9PX1adKkiSogq1+/Ptra2sTGxnL06FG8vLw4duwYycnJFCtWjPT0dFJSUgCQyWQ0bNiQUaNG4eTk9M6ND95cRtmuXTsyMzNp3LhxrqW0qampFClShMePH7Njxw6sra25ceNGjt1H9+/fT7du3QgJCVEFVg0bNqR69eps3rwZUDbZr1KlCh06dODQoUP079+fLVu2ALB582b69++f476HDh1i6NChpKamflBVWHp6OkePHmXbtm0cOXKErKws1NXVkcvl1KlTBxcXF7p37/7elXWCIAiCIAjC9+3DuvQKgiAIX11ERESeYRgoe4adP3+e2NhYUlNT810Kl0PVqnDlCsjl/zuWkQFXryq/fvUq5/na2v8LqI4fV/6a106EBw9CcDDaCxaowpKyJibcvXuXpKQkrl+/zuDBgwHQT0pi0sKFTFi8mP5btmAQH8/df/plHd21i+DgYOWz7NsHRkbKZY8AFy4g3bhBbPnyFDpzhojgYNLS0oiNjeXSpUuk166tDPuyssDfX/keR8ec83zzmnI5eHoqe5S5uIC3tzIAzM+1a/DkCbi65j2+Y4eyyuyfAE5NTY2mTZsyZcoUTpw4QWxsLFeuXGHGjBloamqyYMECbGxsKFq0KD/88AMrVqzAxMSEHTt2EBUVxV9//UXXrl3R/6evmq6uLlpaWly5cgVXV1cMDAxo3749R48eJSsrK88pWVlZcfbsWXbu3Mndu3f5+++/efjwIUFBQSgUCtTU1KhWrRq6urpMnTqVChUqYGlpyZ07dxg/fnyOazVt2hSAS5cuqY49ffqUSpUqqX4/Z84cihQpgo+PD+3bt1eFYXPmzMkRhsXExODm5oajoyN169blwYMH72ycn/3zPmjQIEqUKEGXLl04fPgwWVlZWFlZ8dtvv/HkyRNu3rzJhAkTRBgmCIIgCIIgqIgKMUEQ/l8ICQnBz8+P5ORkSpUqRd26dSlSpMjXntZHefz4MZ6envmO379/n3379ql+r62tja6uLjo6Orl+1dHRoUtkJCPu3+dCpUqcql0bXW1tOty6hVVAABoKBcddXXnerFmO9+jq6lLs9Wvq/9PfK+KPP8h0df3fOcnJqFtYKKuxxo+HkSNh9epcPcQ2b97My5cvUcvKwvTpU3TT0igdFobFo0fopKaSrqNDwxIlCHj6FA0NDfwlifDChdnSrRuNKlXCbelSstTVOVenDnZHj7Jm2DAi/9k9EaDDkSNYP3iA9pw5sGoVPHuWu+LN3Fz5OnpUufSxXTtYvlzZzH/QILh7F/JbhvpPNZnC358gmYyQkBC0tLSwtLSkmIaGsg/bmDHwyy/KDQWsrOCvv/L93snlcvz8/FQVZBcuXCA2NhZtbW0aNmyoqiBr0KABd+/excvLiwMHDvD0n88HUAVhurq6dOrUiXHjxtGoUaM8g6U3l1GamJggSRKvX79m/vz5jBw5kvnz5zNlyhRGjRrF1q1bSUxM5PDhw3Tq1El1DTMzMzp16sTy5ctJSEjAwMCAnTt30qtXLwIDA7GwsMDMzIyEhATi4uJIS0tj6NChrFmzRjUnb29vhg4dSnp6OsuXL6d3795vDcIePnzItm3b2LJlC69fv1ZtQGBubk6fPn3o0aMHVapUyff9giAIgiAIgiACMUEQvnvnzp3D19c3xzFNTU1cXFwwNTX9SrP6eJGRkaxZsybf8dKlS1OiRAnS0tJITU3N8Wtex1JTU3G+d49uz56h+c+/Eu5pa3NWS4vRiYm46OiwNzMT+ZsVZMCvwMx/vu4HvLng8k+gNdC4cGHUdXVZkJJC36Qknujo0LtWLVWoVqVKFYoVKwZA9Xv36PavZvdHO3RgyN27vPqnSi0R8ARGA2eBKsAqZ2dM1dVx3bULdzc3gipXBkA/MZFRK1dysm1bXnboQJPbt6m1cSPx9vbIt26laNGiyp0fCxVSLpnctAn69oWdO5VVcTdvKjccOH5cGZL9m1wOJiYoypVjy48/5qjak8lkDHz8GJMrV+DRI2VV3XsEYv+mUCi4d++eKiA7f/48UVFRaGpqUr9+fWxtbWnevDklSpTg5MmTHDhwgFu3biGTyZDJZCgUCgCKFi1K9+7dGTduHBZ5VPO9uYzSzMyMoKAgWrduzZYtW/Dw8GDixIm4uLjg4eGBtrY2T548UVX99enTh4cPH3Ljxg3u3LlDrVq1uHz5Mo0bN8bNzY2jR48SGxuLoaEh0dHRdOzYEW9vb9TV1YmOjmbMmDHs3LmTTp06sW7dunz7mYWFheHu7s66desICgpShWAVKlSgb9++ODs7Y2lp+d6frSAIgiAIgvD/mwjEBEH4rgUHB6uWaP2bvr4+Y8eOzdET6b9i27ZtORqZZ1NXV2fEiBGqkOmDxMbCgwdgYKCsiJoyBebNUx6ztCQrKytHiGbcvDlkZKAVFsbjn3/meYsWpKamovHsGXYTJnCpWzf8q1QhLS2NpidPUvvePSKKFmVxmzbEKRREKRRoa2tTrVo1QLlsslREBDppaVR69ozat27hX7o01d9YspkFbEDZoN8WaAe0nDkT02fP6Lt9Ox7Ozvj/E/g4HjyIcXg464YOZbenJ438/dkKRAElUYZWhQsXJiYhgf2GhmyvWZO9Fy4QbGLC8bFjsY6JocXs2bxcsQKdnj0pXry4qjk7oKom8+vfH+9/LcUrHhXF8D//JHrVKkoNG6Y8+BGB2L8pFAoePnyoCsh8fX2JjIxEXV2dunXrYmtri6WlJa9fv+bYsWP4+vqiUChU4RGAsbExffr0YfTo0Tka1b+5G2VMTAza2tqoqamxdu1aIiIiGDNmDM2aNePixYtUqVKFBw8eoKGhwdq1axk5ciTx8fGcOnWKzp07Ex4eTnR0NFZWVmhra1O4cGGioqKoXbs2Fy9eRFdXN0dV2IoVK3Bzc8tVFZaYmMi+fftYtWoVt27dUh0vXbo0ffr0oVevXtSoUeODd54UBEEQBEEQBBGICYLwXfP29sbPzy/f8R49euRZMfOtS0xMZPfu3YSFhamOaWtr07lzZ6pWrVowN2nQQNnQ/sULUPtXy8mrV5XLDocOhXXrYMsW6NdPOXbuHLRs+fZrjxkDy5YhSRLe3t7cuXMn1ylTfvsNDUlC8eIFaUWLkpaWRrFy5ZCXKIFGWBhBc+cS1rgxvr6+mD56lKNCzOTlSwZu2sT2vn15bmrK7t27aRIQwGZJIg3QfeM+2VVnJ//5tTfgDnQAjqIM3bL3pNTS0kJHRwd9fX3+TE6mU0ICS8ePJ6Vw4Rxz7+XujmZmJjcWLaJbdvP/AgjE/k2SJPz9/XMEZKGhoaipqVGrVi0aNmyItrY2gYGB+Pj4kP6vfmimpqYMHjyYYcOGUbRoUeB/yyiXLVuGnp4eSUlJ9OrVizp16qj6cL148YL+/fuzefNm7t27R82aNTlz5gy3b99m+vTpJCUl4ezszJEjR1AoFKSlpVGuXDn8/PyQJInRo0eza9cu7O3tWbduHaVLl1bNKSsri2PHjrF8+XLOnTunqkw0NDTE1dWVPn36ULt2bRGCCYIgCIIgCJ/kv1cWIQiC8AGSkpLeOv7HH39QpEgRzM3NVa+KFSt+81VjhQsXZvDgwTx79ozw8HD09fWpVq0a2traBXMDT0+4fh0WLcodhsH/dl5s314ZiL3JykrZUP9N69fDsWNQvryyP5eZGQCylBQc27ShTJkyXL9+naioKNTV1WkVH4/mP72w1P/6C/2hQ5WN5LW0UAsJgXXrqDxkCJVRNmOXXb8OQOI/wVS748d5UaECsUWLUiYjgwdHjqC+aRPMm4d6lSrcX7aMV+rqhIaGkj5+PA309bHKyCD19WvuVKhA0dhYTOLjQZIIfeMxMjIyyMrKQp6UxA8KBRe0tXOFYRWfPsX8yRM8nZ15ePgw+xYtwsjIiFnR0SQEBHB31SpKmJtjXKUKpUuXRkdH5/2/L0lJyt08r16Fa9eQxcZSbcsWqg0dytChQ5EkiaCgIHx9fQk8dIj2W7ZQLy2NDOCSgQEezZsTmpnJjRs3SEpK4tmzZ6yYMoUSU6bQUVMTY6BQ2bIsdnJikK8vP06dyvnz59m7dy+xJ05wydoa7Tt3qAlobtmCZ7t2dO3aldq1a3PhwgWSk5Np3749ly9fpvDevdxAWc33Sk2NYgMHcv78eX788UcyMjLYsWMHrq6uquq1q1evouvoiHVkJC+A04CBgQE9evRg0KBB1C9fHtnkycqfucRE5UYOkydD9+45PyN/f1i7VvkZ3bql3BTh2TNlICkIgiAIgiAI/xAVYoIgfNdOnjzJ33//ne94YGAgt2/fJigoSFU9o6GhgampaY6QzNzcnCpVqlCuXLmcy+a+B+fPw+zZ0LYtlCih3HFyyxZo0wYOH4Z/h4P/9M7C1BRWroT69XNWiOUln6b6+PlB69bKHl7VqiGXJNRu30bm7g6lSkFoKMyfD5MmwbJlMG4caGrCxo2qoC4pKYnoBQso/fIlC375BbmGBtNnzULtbf96MzCAuDjl1927g6+v8vddu8Lu3QBIgwfDrl08uXqV8NhYIiMjiYyMJCIignKXLzPw1CnmVqtGeo8eyl5k/7C+fRsnb++3fuRjgeX/fF20aFFMTEwoW7YsZcqUwcTEhDJlyuT42sjISPlz9/y58nMvXx4qVVJW4+X12YeEQO3aYGBAtKsrLx48oMrhwwQDtTIyyAQqVqxISV1d9j5+jJ4k8SfwEqgFDJXJSKpQgcL+/uzZt4+xY8cy4vVrJgPPCxdGSkykKlCvbl2cnZ1JSUnJcfs6169jf+QI3hoaHFMo+LV1a4xPnuRn4LGDA2vXrqV06dI8efKEBQsWsHfvXlrGxbEdKAScNDen0NatNGrUSPnZJiRA3boQEaGsLjQ2hj17lD+7O3dCr17/u/nWrTBwIFhaKn92/fxEICYIgiAIgiDk8m2XQAiCIHyievXqce3atVwN4QGMjIyYMWMGMpkMuVxOSEgIgYGBOV7Hjh1j9erVqp37tLS0MDMzyxWWmZubY2JikiMY+c8wMQF1dWXlUWKiMnD57Tf46afcYRiAj48ymJg69dPvXbassnH9mTOwbRvqmZlQoYIyQEtMVDa6r1dPeW720tfMTGXz+38U+uf1zMoK+T/zvWdlRfnQUIqkpKCekaEMwEqUgMBAZdXbP33LAOjWDbJ35cwOVqKikO3bB/b2mFtZYf7veTs6gp4eU65fx/v06RzLcp+ZmuLh7AyAhqYmiQkJxMfH0+/yZUI0NFikpcWlhAT4J7CLi4sjLi6OR48eqSoTMzMzefP/V8lkMooXL065UqWwaNmSIlWqUF8mY+C5c9y7dw/u3cPExIRixYoplxLOnQvJyXDzJiXKl6cEgI8Plm3aED5vHsfKlcPX15fChw9TQZKwA/4uVoz09HQ2paQQLUnMfP6cerq66DZpwoYNG7hx5AhFN2wgLSmJ9draVE1Px97ePlcYppGZSaszZwgwN+eCgwOl9PSwXreOlVpa/CaTEbdwIYuXL2fHjh2Ehirr7wy0tVmjp8drZ2cKbdlC27ZtoUmT/1103Tp48gROn4YfflAeGzZMuWx3/Hjl91BLS3ncwUEZbhYurPxev2XJtCAIgiAIgvD/l6gQEwThuxcQEMDBgwdJS0tTHStVqhQuLi6qvklvk5WVxYsXL3KFZYGBgTx//lwVtunq6lK5cuU8wzJjY+Pvq+fRqlXK0CE0FNasgS5dlBVJAKNGKQOoFy9gxw7lsb/+Ui5hmzNH+fsKFaB3b+XXy5Ypl7g5OSmrnhIT4cQJOHUK7O3h0KH/3Vcuh2bNlFVmEyeCoSH8+ScEByNdu8arQoWIjo5GV1cXU1NTNDU1//ferVuhf3/lUtDskO0d1+T6dfh3T7aYGGWF0j/VZOnp6ezdu5egoCDVKerq6nTs2JE6der8731v9BBTKBTExMTkqDp789fw8HBevXpFREQEUVFRuXp/AdQDrpNzh08NDQ2KFSvGw5gYHpQowfZOnTAzM6NixYqYmJjQuH9/1CtUQP3sWeUb1q6FYcM4+fvvHHz5El9fXx49esRQYC1QDfD/59qamprUqVOHhIQEpoSG4hYfz6xff801r8oBAbju2sXOXr14UqUK4eHhvHz5EntDQ4Zu24YbsBNluGxjY8P48eNpd/Uqalu2wOPHoKcHI0Yof8ayOTgoKxcjI3PebNEi5ffs5EllReO/ZY+LCjFBEARBEAThX0SFmCAI370qVaowbtw4Hj9+THJyMqVKlaJSpUrvHVBpaGhgZmaGmZkZ7du3zzGWkZHBs2fPcgVlHh4eBAcHq6p8ChUqlGdQZm5ujqGh4X8vLFu0SBl4ZTtwQPkCcHNTBmLPnsH06Tnfl/17W9v/BWLNmsHly8qlihERyqq0qlVhyRJluPYmdXU4elQZcqxYAampyiWbW7ciq1aNskDZsmU/7Fnecs1cYRjA3r3KKrV/qsm0tbVxdXXl1atXvHz5Em1tbapWrarseZYPNTU1DA0NMTQ0xNLS8p1TTE5OzhWeqd26BWvWUKtWLe6pqfH69WtiYmLQfP0aQ+BIZCSbN2/OcZ3tQMdnzyijrU2RIkWor6/PYaDi0qVY29nRctAgKiYkYLliBX5Fi6KmowOPHgHKqrWrV68CYFy1KsTH5znX0uHhAISWKaM819iYU6dOcTIoiEGAXenSOC5fTufOnZUVccHBynBx82bQ1c3zmqSn5z2mp6f89ebNvAMxQRAEQRAEQciHqBATBOG7FBcXx9OnT5EkiUqVKlGsWLEvPoe0tDSePn1KQEBArsDs1atXqvMMDAxy9SrL/vprzFv4j7hxI8/+bVlXrqDRuDHBv/1GQMOGREREEBwczLNnz2jj40P3Z8+oVKYMUYmJJCcn00+hYBHw5k/aVmAQoK6lhZ6eHtra2kiSREJCArq6uhyvXJkG16/nWSHW4cgR6t24wZyZM1XHQkJCaNiwIQOnTEHWqpWqTxug7OEWGgqXLil/L5PlrhAbPVrZg+7pU2V1YTYXF/DwUC6xXbky92ckKsQEQRAEQRCEfIgKMUEQviuSJHHq1CmuXLmSowdTgwYNaN++/RetxNLR0cHS0jLPCqDk5GSCgoJUAVl2aHbmzBkiIiJU55UoUSLfyrIiRYp8sWcR/js0MjMBKG9uTvnWrXOMKaZNg99/5+bFi6Tp6JCamkrWX3+RuWULl8zNeSqXU/zBA9wCAqB4cabp6JCYmEhsbKyqr5mJiclb76+ZlYX8XxtP1KtXj0GDBik3b0hN/d/A2bOwf79yOe3bDBqkXN7ZowcsXQpGRsqm+tm7mb55TUEQBEEQBEF4DyIQEwThu3Lt2rU8d5W8du0aRYoUoWnTpl9hVrnp6+tTs2ZNatasmWssISGBJ0+e5KoqO3r0KNHR0arzjIyM8gzKKleu/NblevlJS0vj4sWLPHr0iMzMTCpUqICNjQ2lSpX6pGf9/04ul5Oenq56paWlfdTX/z5WJjSUOcCyZcs4umuX6rhZbCy7gJ9GjMBj7Ngc750rlzMJMK5UiQygCeALNAJuvtF8fgYwMzqaBcArlD3RdHR0UFNTQ0dH563Pm6mhgfq/NrFQhdNpaf9b+piVpaz86t1bWen2NjVrwq5d8OOPkP1n2NhY2X9u2DAoVOid3wdBEARBEARBeJMIxARB+G5IkpRnGJbt6tWrNGnS5Jvv11WkSBHq1KmTsyH7P2JjY3MFZffv3+fgwYPExcWpzjMxMckzLDMzM8sz0EhPT2fLli1EvtG0/P79+/j7+9OnT58P78v1lSkUigIJoT4mqPr319k7lH6o7BBKW1sbbW3tXF+rZ2QoT5TJMDAwUB0vlZkJgYG0qVEDA1vbHO9ts3076ffvs2vrVnR0dKizbBlyPz/WHD2KtrY2MpmMsLAwEnx9UZs7l2E1a7IiJYWgoCBS/6nCetdOqkmFC6MmSeglJZHyT1BVokQJyMiA6Gj4p7cY27eDv79yB8nnz3NeJDFReaxUqf/1CevWTdlc/84d5UYIderAuXPKsSpVPuozFgRBEARBEP7/EoGYIAjfjczMTOLzafQNkJiYSLFixcjKykJTUxMNDQ00NTVzff2lxz70GpUrV8bCwkI1pv7P8rSoqKhcYdmNGzfYvXs3SUlJAMhkMsqVK5erV1l6enqOMOzNz/T48ePK5W7vIEkSGRkZnxwgFcTXmf8sG/xQampqeYZP//5aR0eHwoULY2ho+M5zP+ZrbW1tZcP5t/mnh9jYMWMY+0YPMQCOHaODoSEd3ujjBSgrqho0wMnJiWfPnqEWFUVmWhqLFy9WBaBZWVnUB7oBUmYmDg4OWFlZYWRkxPz587lw4QIJbwmgwo2NASgTGsqTf85bsWIFZYODaaxQQK1ayhODg5WbE+RVtbl9u/J18KBy99FsWlo5q8l8fJS//mtpqCAIgiAIgiC8iwjEBEH4bmhoaKClpUVGduVMHubMmUNWVhZZWVlkZmaqXm/+/m1jKSkpH/W+7K8/1z4mbwvSyvxTkaNQKMjKyiIjIwM/Pz+uXLlCamoqCoWCIUOGqM77t1evXtGuXTuSkpLeGkK97XN/G5lM9t5Bkb6+PsWLF88VThVUIPXOEOq/omtX2LYNKTiYEJmM+/fvk3DwIM4BAcxLS2NO4cKkpqayAhgFGD9+TAlbW0aMGIGVlRX1d+2CtWsZvWULpxISGDp0KM+ePQOUf87eVvX2zNSUFF1d6t24wZMqVUhOTubSpUs8uXgRa5mMi5qatJEkZD17/i8ce1PnztCxIwweDA0b5v+MgYHKvmKdOokKMUEQBEEQBOGDfSf/5S8IgqCs7qlZsyY3btzIc7x27do4ODh84VnlJJfLPzqM+5Cx1NRUYmNjiY2NJT4+noSEBBISElShVmZmJunp6SgUCuDdy+BKlSpF6dKlPyhk+pAQ6ltfxvpNWbUK4uKUOzMCHD4MISEkJydzs0kT7jx/zquEBH7JzCSmYkWWShKFgInAE11dXvzwA3OtrbGyssJaRwepfXuWBQUpQyg1NVizBnbvJsjMjNpt2pCYmAiAhZ4ea5s2RVdXF/mhQwDY+PoCEF+0KHetrQHI0tTkbMuW2B09Ss8DByg7YADTnZwofPAgUySJeS4umE2bxqJFi3B0dMz7e29qmrMyDMDSUrkjZfnyyl0j16yB4sWVodib4uP/t+Nk9s6Vq1ZB0aLK18iRn/LpC4IgCIIgCN8JmfS5yhUEQRC+gtTUVLZv3054eHiO46VKlaJv377oZfcj+o9KT08nPDyc0NDQt77e7CcGoKuri4mJCWXKlKF06dKUKVMmxys8PBx/f/8871m8eHFGjhwpQqtvhKJ8edRevsxzrCIQrq2NhYUF7cqWZai/P+WCg5Fpa6PWqROyJUuUOzS+yd8fpk2Dq1eRwsN5raHBjrQ0pksSqUC5cuVYu3Yt7XV0UGvVKs/7Pq9QgW39++c41iEkhLq+vqi/eAHlysHIkfjZ2jJk6FCuX78OQKVKlZg3bx7dunX7Xygrk8GIEcoQ600uLsqAKyICDA2V/cRmzVL2GcsxmefKQC0vFSrk7lcmCIIgCIIg/L8kAjFBEL47WVlZ3LlzhydPngBgZmaGtbU1mpqaX3lm+cvMzCQiIuKtIVdYWBhRUVE53qelpZUr3MrrVaRIkbcGWgkJCaxfv57k5ORcYz169MDCwqLAn1l4u5SUFB49esT9+/d58OAB9+/f5/79+7z8JwxTV1fH3NwcKyurHC8zM7MPXvrp4+PD2LFjefDggepY48aN2bhxI5aWliQlJdGpUyd8fX0pXLgwiYmJ/Prrr8ycOROFQkFAQAAhISE8ffqU7du307FjR6ZMmZLv/c6fP8/w4cNV96tYsSJz5syhZ8+e38+yVUEQBEEQBOGbJgIxQRC+e5mZmTx58oTk5GSMjY0xMTH5YtVOcrmcyMjIPMOtN38fGRmZo7+YhoZGnpVc/34VK1aswJ4lOjqaU6dOERAQgCRJGBsb06JFC6pWrVog1xfylpmZSUBAgCrwyn4FBQWpfiZMTU1zhF7Vq1enatWqee4Y+r4yMjJYunQp8+fPJzY2FlCGbK6urixYsACjfyrJnj9/TosWLXjx4gXlypUjLCyM3r17s2nTplw/e5mZmWhpadGmTRtOnjz51vtLksThw4cZO3asqj9Z+fLlmTFjBr1790ZLS+ujn00QBEEQBEEQ3kUEYoIgfNcCAwPx8vIiJSVFdax8+fL06NEDfX39j76uQqEgKioqz3DrzVd4eLiqTxcoe3UZGxvnCrb+HX4ZGhq+s6/X55KVlYVcLkdbW/ur3P97JZfLefbsWY7Q68GDB/j7+6t2xSxTpgzVq1fPEX5ZWlpSqFChApvHq1evmDBhAvv27VM1x9fT02Pq1KmMGTMmx5+L8+fPY2dnR1JSEg0bNuTRo0c0bNiQI0eO5FtxWaxYMfT09Hj16tV7zUcul7Nz505++eUXwsLCADAxMWHKlCkMGDDgk0I/QRAEQRAEQciPCMQEQfhuRUdHs2bNGuRyea6xChUq0Ldv31wVLpIkERsb+84eXWFhYTl22pPJZKrG82+r6CpVqhTq6uqf/dmFr0eSJEJCQnIsc7x//z4PHz4kNTUVUIZGNWrUyFX1Vbx48c82Jx8fHyZNmoSfn5/quImJCfPnz8fZ2TnXUsX169fz448/IkkSPXv25OLFixQvXpwLFy5QpEiRfO9Vv359bt++TWZm5gdVL6anp7N27Vp+/fVXEhISkCSJUqVK8fPPPzN06ND/fP8/QRAEQRAE4dsiAjFBEL5bx48f5+rVq/mOa2lp5WhQn13plZ6enuO8EiVKvHPpopGR0Tfdo0z4PF6/fp1rqeP9+/dJSEgAQF9fP0fglf21sbHxF1m2m5aWxvLly1m0aFGO/nP169dn/vz5tGjRItc8srKyGD16NGvWrAFgxowZeHt7Ex0dzZUrVzAxMXnrPUePHs3KlSt59OgR1apV++A5JyYmsmTJEhYsWEBmZiZyuZzixYszfvx4RowYQeHChT/4moIgCIIgCILwbyIQEwThu7Vt2zaev2VHuZMnTxIXF5dvyFW6dGlKly4tlg4KxMfHqyq+3qz8ioyMBJThqoWFRa4G9+XLl/8qS19fvnzJpEmT2L9/v2o5pkwmo2vXrsycORMrK6s83xcTE0OXLl04f/48ampqbNq0id27d/P3339z8eJFatSo8c57nzx5knbt2vHbb78xderUj36GyMhI5s6dy+rVq9HU1CQjI4MiRYowduxYRo0aRbFixT762oIgCIIgCIIgAjFBEL5b+/fv5/79+/mOu7i4UKVKlS84I+Fbl5qaqtrZ8c1X9s6OampqVKlSJddSx8qVK3/13RElSeLEiRNMnTqVW7duqY5raWkxcuRIfvrpp7dWdz169IiOHTvy8uVLdHR08PLywsPDg23btnH8+HFatWr1XvPIyMhAW1ub1q1bc+rUqU9+rufPnzNz5ky2b99OkSJFSE1NRVdXl5EjRzJu3DgMDQ0/+R6CIAiCIAjC/z8iEBME4bsVGBjIrl278hwrVKgQY8aM+eohhvB15LWz44MHD3jy5IlqZ8eKFSvmqvj61J0dP4eUlBRWrlzJ4sWLef36tep4iRIl+OWXXxgyZMhbe34BHDlyBGdnZzIyMihevDgnT57k0KFDTJ8+nW3bttGnT58PmlPRokXR09MjNDT0o54pL/fv32fq1KkcOnSIkiVLkpiYiJqaGsOGDWPChAkYGxsX2L0EQRAEQRCE758IxARB+G5JksSxY8e4fv16juOampr07NmTSpUqfaWZCV9K9s6O/25w/+bOjqVLl84VfBX0zo6fw9OnT5k6dWqOZZEAVapUYdq0aTg7O6OlpfXWa0iSxMKFC/n555/R1NTEzMyMEydOcO7cOfr27cucOXOYNm3aB8+tfv363Lp1i6ysrALvlXbp0iV++eUXLl68SPny5YmOjiYrK4vBgwczadIkypUrV6D3EwRBEARBEL5PIhATBOG7JkkSQUFB3L17l5SUFIyMjKhfvz5Fixb92lMTCpAkSbx69SrXUse8dnZ8s7l99erVKVGixFee/fvLDnmnTZvG7du3AWVvMEmSsLW1ZfLkybRt2/a9Qqi0tDSGDBnCjh07UFdXp1mzZhw8eJBbt27Rvn17+vbty4YNGz4q0MpurH///n2qV6/+we9/l+zPYfLkydy9e5dq1aoRFhZGSkoK/fr145dffhGBtyAIgiAIgvBWIhATBEEQ/lPe3Nnxzcqv+Ph4QLmz45uh15fe2fFzSExMZPXq1SxevFi1W6S6ujoKhQJnZ2cmTpxInTp13vt6YWFhODk5cevWLeRyOd27d2f79u0EBATQrFkzGjduzOHDhz9659SCaqz/LgqFgt27dzN9+nSeP39OnTp1ePHiBbGxsbi6ujJlyhSqVq362e4vCIIgCIIg/HeJQEwQBEH4JiUkJORa6vi2nR2zQ7AKFSp8lZ0dP4fAwEBmzJihWhaZHehpa2szdOhQxo4dS8WKFT/omjdu3MDBwYHExESSkpL46aefWLhwIWFhYTRq1AhDQ0POnz9P4cKFP3reaWlp6OrqFlhj/XfJyMhg/fr1zJkzh9jYWJo0aYK/vz8RERH06NGDqVOnvtcOmYIgCIIgCML/HyIQEwRBEL6q99nZ0dzcPFfF17ews+PnoFAoOHLkCDNmzMDPzw8ADQ0NsrKyKFmyJGPHjmXYsGEUK1bsg6+9e/du+vfvT6FChYiJiWHJkiWMHTuWhIQEbGxsiI2N5cqVK5QpU+aTn+NzNNZ/l6SkJJYtW8aCBQuQJIkWLVpw9+5dgoODcXJyYtq0adStW/eLzUcQBEEQBEH4dolATBAEQfgisnd2/HfVV1BQEAqFAvjv7Oz4OcTHx7N69WqWLFlCdHQ0ADo6OqSlpVG1alUmTpyIq6vrR30WCoWC6dOnM3fuXIyMjIiLi2PHjh10796dzMxM7OzsuHbtGpcuXSqwnl/16tXj9u3bZGRkoK6uXiDXfF9RUVHMmzeP1atXU7hwYdq0acO1a9cICgqiY8eOTJs2jcaNG3/ROQmCIAiCIAjfFhGICYIgCAVKoVDw7NmzXBVf/97Z8d99viwtLT9pmd5/1cOHD5k9ezb79+9X7cqoq6tLSkoKNjY2TJw4ETs7u49eBpqYmIibmxuHDh3C2NiY9PR0vL29sbGxQZIkBg4ciLu7OydOnKBly5YF9lyjRo1i1apV+Pn5YW1tXWDX/RDBwcHMmjWLrVu3Uq5cOTp06MD58+d5+PAhrVq1Ytq0adja2v5ne8sJgiAIgiAIH08EYoIgCMJHyWtnxwcPHvDw4UNSUlIA5c6O/674+q/t7Pg5yOVyvL29mTVrFnfv3gVAU1MTDQ0NUlNT6dq1KxMnTqRhw4afdJ+nT5/i4ODA8+fP0dXVRU9Pj+PHj2NhYQHArFmz+PXXX3F3d8fV1fWTn+tNJ06coH379syZM4dp06YV6LU/1MOHD5k2bRoHDx7EysqKTp06cfz4cfz8/GjWrBnTpk177905BUEQBEEQhO+DCMQEQRCEd3r9+nWeDe7z2tnxzcqv0qVLi5DhDdHR0fz5558sW7aMmJgYAIoUKUJqaipqamoMGDCAn376icqVK3/yvc6dO0e3bt3Q1tYmMTGRSpUqcfToUVV/sK1bt9K/f39+//13pkyZ8sn3+7fk5GQKFSpEq1at8PHxKfDrf4wrV64wefJkzp07R5MmTXBwcODAgQNcu3aN+vXrM23aNOzt7cXPrCAIgiAIwv8DIhATBEEQVN5nZ8dq1arlqvr6nnZ2/Bzu3LnD3Llz2b9/P3K5HJlMRqlSpYiMjKRYsWKMGjWKESNGULJkyQK535o1axg9ejRVq1YlMDAQW1tb9u3bR5EiRQA4efIkdnZ2DBgwgLVr1362AMjAwABdXV3Cw8M/y/U/hiRJnDx5ksmTJ3P79m3s7Oywt7dn586dXLhwAWtra6ZOnUrXrl3Fz7QgCIIgCMJ3TARigiAI/w9l7+z47/ArODgYyHtnx+rVq1O5cmU0NTW/8uz/GzIzMzlw4AC///479+7dA5SBoqGhIaGhoZiZmfHTTz/Rr18/9PT0CuyeY8aMYc2aNTRt2pRLly7Rp08fNmzYgJaWFqAM52xsbLCxscHb2/uz7tRZr149bt26RUZGxje3I6hCoWDv3r1MmzaNoKAgXF1dsbOzY9OmTfj4+GBhYcHUqVNxdnb+5uYuCIIgCIIgfDoRiAmCIHzHMjMzCQwMzFXx9e+dHf/d4L5atWr/L3Z2/BwiIyNZvXo1K1euJDY2FoDixYujpaVFeHg4DRo0YOLEiXTu3LlAd1+Mjo6me/fuXLhwgebNm3PmzBmmTJnCb7/9pqoACwkJoVGjRhgZGeHr60uhQoUK7P55GTlyJKtXr+bmzZvUqVPns97rY2VmZrJp0yZmzZpFdHQ0P/74I+3atWPNmjUcOXKEypUrM3nyZNzc3FShoiAIgiAIgvDfJwIxQRCE70BeOzs+ePCAx48fq3Z2NDY2zrXU8f/rzo6fw/Xr15k/fz5eXl7I5XIATE1NSUhIIDo6Gnt7eyZMmICNjU2BL1F88OAB9vb2JCQkYGVlxYULF1i9ejU//vij6pz4+HiaNWtGYmIiV65cwdjYuEDnkJfjx4/ToUMHZs2axYwZMz77/T5FcnIyK1asYP78+WRlZTF+/HhatWrF8uXLOXDgAOXLl+eXX36hf//+IiwWBEEQBEH4DohATBAE4T9EkiRCQ0NzVXy9ubNj0aJFqVGjRo4G99WrV8fQ0PArz/77k56ezt69e/njjz948OABoFwWWbVqVZ4+fUpmZia9e/dm/Pjxqp0dC+q+V69exd/fn9jYWC5cuEBYWBgymYwHDx7g4eGBg4OD6vyMjAw6duzIzZs3uXTpEpaWlgU2l7eJj4+naNGitGzZkjNnznyRe36qmJgY5s+fz4oVK9DX12fq1Kk0b96cxYsX4+npibGxMRMnTmTIkCEFttRVEARBEARB+PJEICYIgvCNioqKyhV8vbmzo56eXq6ljmJnxy8jNDSU1atXs3r1atX3w9DQkAoVKuDn50fhwoUZNmwYo0aNonTp0gV67/T0dLZs2UJERESO42lpaezbt48dO3bQqFEj1XFJkujXrx8eHh6cPHkSW1vbAp3PuxQpUgRdXd1c8/3WhYSEMHv2bDZv3oyJiQmzZs2iYcOGLFiwgB07dlC8eHHGjx/P8OHDRZWlIAiCIAjCf5AIxARBEL6yN3d2fLPJfXaAoKmpyf+xd99xNe9/AMdfp70lESFkJCGyyt7rUvbe69pc68q415advS/32ptsykjKCFkVKYWi0t516vz+6HZ+jk7hXsT1eT4ePcr38x2fc5xO57zP+/N+W1paKhS3r1q1KmXLlhVd8L4imUyGp6cny5cvx8XFBZlMhkwmw9LSEi0tLe7du4eZmRm//PILQ4cO/WJBkkuXLnHt2jWlY6ampgwfPlxh22+//cb8+fPZu3cvvXv3/iJzyk+tWrW4d+8eKSkpaGpqfvXr/1tPnjxh9uzZHDp0iCpVqrBw4UKqV6/OkiVL2LFjB3p6ekycOJHx48djaGhY0NMVBEEQBEEQPpIIiAmfRUZGBuHh4aiqqmJiYiLepAuCEikpKfj7++fK+Hq3s2OFChVyZXyJzo4FKyUlhf3797N06VL8/f2RSCSoqqpiZ2dHREQET548oUaNGkydOpXu3bt/8f+r1atXExsbq3RMIpHg6Ogon8P27dsZNmwYTk5O/Prrr190XnnJKax/8+ZN6tatWyBz+Bxu377NjBkzcHV1xdbWFicnJ8qXL8/SpUvlXTzHjh3LL7/8IpYnC4IgCIIgfAdEQEz4127dusXly5dJTU0FoFChQrRr1w4LC4sCnpkgFAxlnR0fP37Ms2fP5J0dy5QpkyvwJTo7fltevHjB+vXr2bhxIwkJCQAUKVKEevXqcf/+fUJDQ2ndujVTp06lRYsWX2WZ6q1btzh48GC+2WfTpk1DW1ub8+fP89NPPzFixAjWr19fYMtoT58+TYcOHfj999+ZM2dOgczhc3J1dWX69OncuXOHtm3bsnjxYooXL86KFSvYuHEjMpmMUaNGMWXKlK/SuEAQBEEQBEH4Z0RATPhX7t27h4uLS67tEomEwYMHU7p06QKYlSB8HTmdHd9d5vjo0SOlnR3frfVVpUoVDAwMCnj2gjIymYwrV66wcuVKTp8+Ld+Wk6l3+fJlkpKS6N27N5MnT8ba2vqrzW337t0MGzaMgQMHYmpqqnSfYsWKMWrUKO7du0fjxo1p2rQpx44dQ01N7avN831RUVEYGxvTtGlTLl++XGDz+JxkMhlHjhxh5syZPH36lN69ezNv3jwMDQ1xdnZm7dq1pKenM3z4cKZOnSr+FgqCIAiCIHyDREBM+MdkMhlr1qzJc+lOpUqVCqRejSB8bh/b2fH9jC/R2fH7kZSUxO7du1m5ciVPnz5FIpEgkUho3bo16urqnD9/Hk1NTUaMGMGECRO+aoAjMzOTGTNmsHTpUgYNGsS8efPYunUrqqqqufbt2bMnOjo62NraYmpqytWrV9HV1f1qc82Lvr4+2traREREFPRUPiupVMqOHTuYM2cOERERDB8+nNmzZ6Otrc3atWtxdnYmISGBwYMHM336dMqVK1fQUxYEQRAEQRD+JgJiwj+WlJTE8uXL8xzX0tIqsJo1gvBPvdvZ8d3Mr5zA7/udHXN+NjU1FZ0dv0NBQUGsW7eOrVu3kpiYCGQv+7a3t+f169e4urpiamrKhAkTGDFixFcvmh4fH0+fPn04e/Ysy5cvZ+LEifz+++/s2LGDCRMmkJSUBICRkREtWrTA1NSUhg0bkpSUhJeX1zezZM/GxoZ79+6RlJSEjo5OQU/ns0tJSWHdunUsXryYtLQ0Jk6cyNSpU1FVVWXjxo2sWLGCqKgo+vXrx4wZM6hUqVJBT1kQBEEQBOGHJwJiwj+Wnp6Ok5MTeT2EoqKiuHDhAo0aNaJx48Y0btyYMmXKfOVZCoJy8fHx+Pr65sr6erezY+XKlXNlfYnOjt+/rKwsXF1dcXZ25ty5c6ioqJCZmYmlpSXNmjXj1q1beHt7Y2VlxZQpU+jTpw8aGhpffZ7Pnj3D3t6esLAw9u/fT9u2bTl69Chdu3Zl0aJFODo6kpiYSGZmJgYGBqSnp9OuXTt8fHzw9PSkcuXKX33OeRk9ejQbN27k+vXr1K9fv6Cn88XExsaydOlSnJ2d0dLSYsaMGYwZMwaZTMbWrVtZunQpb968oUePHsycOZOqVasW9JQFQRAEQRB+WCIgJvwrBw4cwN/fX+mYkZERfn5+uLu78/jxYwBKly4tD441atSIypUri6wa4YtS1tnx8ePHhISEAKKz448kPj6eP//8k9WrVxMYGIiqqipZWVm0b9+eihUr4uLiQlBQEE2bNmXq1Km0a9euwJ6fLl26RLdu3TA2NubkyZNYWFjw8OFD7Ozs+Omnn9i/f7/C3GQyGf379+fQoUO4urrSqFGjApl3XlxcXHBwcOC3335j7ty5BT2dL+7169fypa3Fixdnzpw5DBo0CKlUys6dO3FyciIkJITOnTsza9YsbGxsCnrKgiAIgiAIPxwREBP+ldjYWHbs2EF8fLzC9pIlSzJgwAB5VkVUVBQeHh64u7tz7do17t69S2ZmJkWLFqVhw4byAJm1tXWBFn8Wvl85nR3fL3D/bmdHMzMzpZ0dtbW1C3j2wpf05MkT1q5dy44dO0hJSUEmk6Gnp0f//v3R0NBgz549REdH061bN6ZOnUrt2rULbK4ymYwNGzYwYcIEmjdvzoEDByhcuDDR0dHUqVMHfX19rl+/nqsu2KxZs1i4cCH79++nZ8+eBTT7vIWHh1O8eHGaNGnClStXCno6X01AQAC//fYb+/fvx8LCgoULF9KlSxekUim7du1i0aJFBAYG0r59e2bPno2trW1BT1kQBEEQBOGHIQJigpxUKuXBgwcEBgYCUKFCBapVq/bBAFVycjK3bt0iODgYFRUVLCwssLGxyTe7JiEhgRs3buDu7o67uzs3b94kLS0NfX19GjRoIF9mWadOHTQ1NT/r7RS+b1lZWQQHB+da6vhuZ0cTE5NcgS/R2fHHkpmZydmzZ1m9ejWurq6oqakhlUopX748/fv359WrV+zevRsVFRWGDBnCL7/8grm5eYHOOT09nXHjxrFlyxYmTpzIsmXL5PNu164d9+7dw9vbm7Jlyyoct2XLFn7++WeWLVvGlClTCmbyH0FPTw8tLS3evn1b0FP56u7evcuMGTM4f/48derUYfHixbRo0QKpVMqBAwdYuHAhfn5+tGjRgtmzZ9OkSZOCnrIgCIIgCMJ/ngiICQCkpqaya9cuwsLCFLabmpoyYMCAfxyUSklJQSqVoqenl+/So7S0NG7fvs21a9dwd3fn+vXrJCQkoKmpSb169eQZZHZ2dujr6/+juQjfl/c7O+Zkfj1+/Fje2bFQoUJUq1ZNobi9lZUVRYsWLeDZCwUlJiaGHTt2sGbNGkJCQuQBpVatWtG+fXuuXbvGsWPHKFq0KOPGjWPUqFEUKVKkoKdNZGQk3bp1w8vLi02bNjFkyBD52OTJk1m9ejUXLlygefPmCsedOXMGe3t7Ro4cydq1a7/pJeg1atTg/v37xMfH/7DP45cvX8bR0ZGbN2/SqlUrFi9eTK1atcjKyuLo0aMsWLCA+/fv06hRI2bNmkWrVq2+6f9TQRAEQRCE75kIiAkAnDt3jps3byods7W1pU2bNp90vvDwcM6dO0dwcDCQXU+sWbNmH11AOCdbLWeJpbu7O2/fvkVVVRUbGxt5BlnDhg2/iTezwr8TFRWVK+Pr3c6O2traCp0dc75EZ0chx6NHj1i7di1//fUX6enpyGQyNDU1GTx4MNWqVWPfvn1cu3aNSpUqMXnyZPr37//NLJV98OABDg4OJCUlcfToURo2bCgf27VrFwMGDMDZ2ZkJEyYoHHf37l0aN25MixYtOHr0KKqqql976p9k1KhRbNq0iStXrvzQGVAymYzjx48zc+ZM/Pz86N69OwsWLKBSpUrIZDJOnTrF/PnzuX37NnXr1mXWrFl06NBBPNcJgiAIgiB8ZiIgJiCTyVi2bBkpKSlKx7W1tZk2bdpHny86OpotW7aQlpaWa6xLly5Uq1btH83R399fHhxzd3fn5cuXAFhZWckzyBo3bkzJkiU/+fzC15GQkKBQ4yvn5zdv3gDKOztaWVlRrlw50dmR7Jp94eHh6OjoUKpUqR/+DbJUKsXFxYU1a9Zw9epV1NXVycjIoFSpUowZMwZdXV02bNiAv78/9evXZ+rUqdjb239Tj6Xjx4/Tr18/KlasyIkTJzAzM5OPeXt707BhQ3r16sWOHTsU/r+Dg4Oxs7PDzMyMy5cvo6OjUxDT/yTHjh2jS5cuzJo1i/nz5xf0dApcTh2x33//nbCwMIYOHcpvv/1GyZIlkclkXLx4kfnz5+Ph4YG1tTWzZs2iS5cu39TjVxAEQRAE4XsmAmICMpmMefPm5bvPrl27KFOmDGXKlMHMzEzhe+nSpRWWVJ48eZK7d+8qPY+hoSHjx4//LG/kQ0JCFDLInjx5AkC5cuUUOllWqFDhhw8cfG2pqam5Ojs+evQoV2fH97O+KlasKDo7KpGens7Jkyd59OiRfJuRkRGdO3emVKlSBTizgvH27Vu2bdvG+vXrefXqFRoaGqSnp9OoUSOGDx9OSEgI69atIyIiAgcHB6ZOnUr9+vULetoKZDIZCxcuZPbs2XTr1o2dO3cqFMoPDw+ndu3amJqacvXqVbS0tORjMTExNGjQgNTUVG7cuEGxYsUK4iZ8slevXlG6dGkaNmzItWvXCno634zU1FQ2bNjAwoULSU5OZsKECfz6668ULlwYgKtXrzJ//nzc3NywtLRk5syZ9OzZUzSgEQRBEARB+JdEQEwAYNOmTYSHhysdU1VVJS4ujpCQEF68eEFISIg8oydH8eLF5UEyS0vLfD/BrlevHlWqVKFkyZKfdYlPeHi4QidLHx8fZDIZxYsXl2ePNW7cmKpVq4pP2D+TjIwMnj17livwJTo7fl4HDx7Ez88v13ZNTU1Gjx79wzQLuHfvHmvXrmXPnj1kZmYC2cHVfv360a1bNy5cuMC2bduQSqUMHDiQSZMmYWFhUcCzzi05OZkhQ4Zw4MAB5syZw+zZsxWek9LT02nevDmBgYF4e3srZL2mpaXRpk0bHj58iKen5zd5+/Iik8nQ1dVFS0uL6Ojogp7ONycuLo7ly5ezcuVKNDQ0+PXXXxk/frw8+8/Ly4sFCxZw5swZKlSogKOjI/379xcfIgiCIAiCIPxDIiAmANn1d44cOaJ0rGvXrrlqf6WmpvLq1SuFIFnOz9WqVZN/sq3M8uXLSUxMRFVVleLFi2Nubo6FhQXly5enXLly8i9jY+N/ldkVFxeHp6enfInl7du3ycjIwNDQkIYNG8qDZLVq1RJvKD7g3c6O7y559Pf3Jz09Hcju7Ph+xleVKlUoVKhQAc/++xYVFcW6devyHG/YsCEtWrT4ijP6ujIyMjhy5Ahr167F09MTLS0tUlNTKVasGOPGjcPOzo7t27dz8OBBChUqxOjRoxk7diwmJiYFPXWlXr16RadOnfDz8+Ovv/6ia9euufYZOXIkf/zxB1euXFHIbMvKyqJfv34cPXoUNzc3GjRo8DWn/llYW1vz4MEDoqOj8/078SMLDw9nwYIFbN68GWNjY37//XeGDBki/zt19+5dFixYwLFjxyhTpgzTp09n8ODBoiOzIAiCIAjCJxIBMUHuxo0bXL58WR7g0NDQoHnz5tSrV++TznPp0qU8l8MkJiZy8uRJXr16RVJSksKYioqKPKsIsrNfSpQoQbly5ahSpQoVK1ZUCJjp6el90rxSUlK4efOmfImll5cXSUlJ6OjoYGtrK19iaWtr+13U4/kSZDIZr1+/zpXx9X5nx/czvkRnxy/n4cOHHD16NM9xc3Nz+vfv/xVn9HW8efOGLVu2sHHjRt68eYO2tjYpKSnUrl2bCRMmYGhoiLOzM25ubpQtW5ZJkyYxZMgQhWWH3xovLy86d+6MpqYmJ06coEaNGrn22bx5MyNHjmTr1q0MGzZMYczR0ZElS5Zw8OBBunXr9pVm/XmNHDmSzZs34+rq+p8O5H4OQUFB/Pbbb+zdu5fy5cuzYMECunfvLs8mfPjwIYsWLeLAgQOUKFGCadOmMXz48B/275cgCIIgCMKnEgExQUFaWhovXrwAspe6/ZNPnFNTU/njjz+IjIxU2K6urs7AgQPly3/i4+NzZZcFBATw7NkzXr169cElNbq6upQoUQJzc3MsLS2xtLTE3NyccuXKYWZmhoaGRr7HZ2RkcO/ePfkSy2vXrhETE4O6ujq1a9eWZ5A1aNAAQ0PDT74fvnXvd3bMyfyKiYkBcnd2zPm5ZMmSoibbVxQQEMDevXvzHA8NDaVkyZJ06tSJ6tWrf/f/Nzdv3mTt2rUcPHgQmUyGRCIhMzOT7t27M2bMGJ4/f86KFSt48OABtWvXZurUqXTp0uWbr6f0559/MmLECOrUqcPRo0eV1v3y8PCgWbNmjBgxgvXr1yuMbdq0iVGjRrFixQomTZr0tab92R06dIgePXowY8YMFi5cWNDT+S7cv3+fGTNmcObMGWxsbFi8eDGtWrWS/64/efKExYsXs3v3booUKcLkyZMZNWoU+vr6BTxzQRAEQRCEb5sIiAkfJS0tDRUVlY9eWpiSkoKnpyd+fn5kZGRQtmxZGjZs+ElZROnp6YSGhsqDZc+fP+fJkyc8e/aMly9f8vbtW6RSqdJjJRIJBgYG8gyzypUrU716dSpUqEC5cuUoUaJErjpiWVlZ+Pr6ypdYuru78/r1ayQSCdWrV5dnkDVq1IjixYt/9O0oaAkJCfj6+ubK+nq3s6OFhUWurK+yZct+1hpvwj8jlUpxdnbOlVGZ49GjR1y4cIH4+HjKli2Lg4MDnTp1omHDht98kChHWloaBw4cYN26ddy+fRsdHR2Sk5MxNDRk1KhR9O/fnzNnzuDs7MyrV69o3749U6dOpUmTJt98ADAzM5Nff/2VFStWMGTIEDZs2KD0g4ZXr15Rq1YtKleujKurq8Jz7alTp3BwcGDs2LE4Ozt/87c5P8+fP8fc3Jz69etz/fr1gp7Od8Xd3R1HR0c8PT1p1qwZTk5O1K1bVz4eFBTEkiVL2LFjB/r6+kycOJFx48b9Jz/QEQRBEARB+BxEQEzI17Nnz7h06ZI8MFS+fHlat279TSyPk8lkREZGygNmQUFB+Pr6EhAQwMuXL4mMjCQlJUXpsSoqKhgaGlKiRAnKli1L5cqVqVGjBpaWlpQrV05e2yYoKEi+xNLd3Z3AwEAAKlWqJM8ga9SoEWXLli3wN6kf6uwokUioUKFCrsCX6Oz47QsICODAgQPyQvI5Hjx4wLFjx1i4cCG1atXi+PHjnDhxgrCwMIyMjOjQoQOdOnWidevW3+RSwtDQUDZu3MiWLVuIjIxET0+PxMREqlSpwsSJE2nWrBlbt25l06ZNpKSk0LdvXyZPnpyrpuG3Ki4ujl69enHhwgVWrlyZZ4fdlJQUGjduTHh4ON7e3grZY97e3jRp0oTWrVtz+PDh7z5ILZPJ0NHRQVNTk9jY2IKezndHJpNx6tQpHB0defz4MV26dGHBggVYWlrK93n58iXLli1j69ataGhoMG7cOCZOnIixsXEBzlwQBEEQBOHbIwJiQp4CAgLYt28f7z9EtLS0GD58OEZGRgU0s4+XmJjIy5cvCQkJ4enTpzx69IinT5/KA2YJCQlKj1NTU5MHzMqUKSMPmJmamvLy5Utu3bqFu7s7Dx8+BKBUqVIKnSwtLS3zDJBFRETg7u7O8+fPUVVVpXLlyjRu3Pija6JJpVICAgJyFbgPCAjI1dnx3SWPlStXFrVlvmORkZGcO3eOkJAQeWDM2NiY9evX4+fnR+fOnTl8+DAAd+7ckQfHHj9+jJaWFq1ataJTp0507NixQAPaMpkMDw8P1q5dy9GjR1FVVUVFRYXU1FQ6duzIhAkTKFasGCtWrGDv3r1oa2szcuRIxo8fr9Bt8VsXEBBAx44dCQ8P58CBA7Ru3VrpfjKZjIEDB3L48GE8PDywsbGRjz1//hxbW1vMzc1xc3P7z/z+VqtWjUePHhEREfFNfLjyPcrMzGTPnj389ttvvHz5kkGDBjFnzhxKly4t3+fNmzesWLGCDRs2IJFIGDVqFJMnT/6uMpwFQRAEQRC+JBEQE5SSyWRs2rSJiIgIpeM1atTAwcHhk86ZnJyMVCpFX1+/wLOpckilUsLCwggODsbX15cHDx7IA2YRERHEx8crFPrPoaGhgaGhIcbGxvLMm+joaJ4/f05WVhZFihSRL69s3LgxNWrUQE1NjbCwMHbu3ElGRobC+QwNDRk2bJhCFk9WVhYhISG5Mr7e7exYrFixXBlforPj9yE+Pp4HDx4QGxtLkSJFqF69er5ZXHl1glVXV8fV1ZXLly9ToUIFbt++rbBEKiAggBMnTnD8+HE8PT2RSCTUr1+fTp064eDgQIUKFb7EzcslJSWFvXv3snbtWu7fv4++vr68qcXQoUMZM2YMr169YtmyZZw9e5ZSpUoxceJEhg8fjoGBwVeZ4+dy8eJFevTogYmJCS4uLlSqVCnPfVetWsWkSZPYu3cvvXv3lm+Pjo6mfv36SKVSvLy8/lOBoxEjRrB161bOnDlDu3btCno637W0tDQ2b97MggULiI+PZ+zYsTg6OlKkSBH5Pm/fvmXVqlWsXbuWjIwMhg8fzrRp0yhVqlQBzlwQBEEQBKHgiYDYD+bNmze8fv0aHR0dypcvn2eNocTERFasWJHnedLS0nj48CGqqqqoqqqipqaW6+ec7xoaGujp6cmvJZPJkMlkSvf90Lm+xr7vButkMhnR0dEEBQVx//59ecDsxYsX8oDZ+8EtQJ71IpVKkclkqKurU7lyZRwcHPK8z0uUKEFiYqI88OXr6yuvG/V+Z0crKyusrKyUFuYWvn1+fn4cPXpUoQaehoYGPXv2xNzcPNf+MpmMNWvW5LnEzMrKCjc3NzZt2oSuri7u7u4KmUY5wsPDOXXqFMePH+fixYukpaVhZWVFp06d6NSpE7Vq1frswerg4GA2btzItm3biImJoVChQsTGxmJubs6ECRPo168fFy9eZPny5Xh7e1OtWjWmTp1Kr169vrulvDKZjLVr1zJp0iRatWrF/v378w1Ou7q60qZNGyZPnszSpUvl21NTU2nVqhV+fn54eXlRsWLFrzH9r2bfvn306dOH6dOns3jx4oKezn9CQkICK1euZPny5aioqDBt2jQmTpyoEGSPiYlh7dq1ODs7k5iYyODBg5k+fTrlypUrwJkLgiAIgiAUHBEQ+0GkpqZy+PBheQ0syO7S2KlTJ6UZIsnJySxbtizP86WlpfH48WOkUimZmZlkZmYq/VlTU5OmTZsq7fjo5uaGv7+/0uPyKpb/NUgkEnlwTEVFReG7RCJR+ILsN8GZmZlkZGQglUqRSqVkZWWRlZWlsNxUU1MTR0fHPK/79u1btm/fTpUqVXJlfYnOjv8dCQkJrFmzRuljXEtLi4kTJ+Yquh4TE8OaNWvyPKeamhrdunXDw8ODESNGALBlyxaGDh2a5zGJiYlcuHCBEydOcPLkSWJiYihZsqS8KH+TJk0+2Kk1LzKZjEuXLrF27VpOnjyJhoYGqqqqJCUl0bJlSyZMmECTJk3YuXMnq1at4vnz57Rs2ZIpU6bQunXr7/Kxnp6ezujRo9m+fTuTJ09myZIl+db7CgoKok6dOtSuXZszZ87I983KyqJ37964uLhw6dIl7OzsvtZN+GqePn2KhYUFtra2eHl5FfR0/lMiIiJYtGgRGzdupHDhwsyePZvhw4cr/C4nJCSwYcMGVqxYQXR0NP3798fR0THfTEZBEARBEIT/IhEQ+0Hs37+fJ0+e5NqupqbG6NGj5UXk37V9+3ZevXql9Hz16tWjbdu2H7zuqVOnuHPnjtKxQoUKMX78eGQyGWlpaSQnJxMfH09SUhIJCQnEx8eTkJBAQkICiYmJJCUlkZiYSHJysvwrJSWF1NRUUlNTSUtLIy0tjfT0dNLT08nIyJB/5QSqcgJvWVlZ8u8ymUz+/Uv+OmhpaTF9+vQ8x/X09Jg4ceJ3XzRbyJ+Hhwdubm55jtvb21OzZk2FbXFxcTg7O+d5TEJCAitWrEAikVCsWDHevn1LZmYmtWrVYtq0aZQvX57y5cvn2W1OKpVy7do1+dLKkJAQDAwM+Omnn3BwcKBdu3YftWwxMTGRXbt2sW7dOnx9fTE0NCQxMRFVVVUGDBjA+PHjKVq0KOvWrWPDhg3ExcXRs2dPpkyZkus2f08iIiLo2rUrt27dYsuWLQwcODDf/RMTE7GzsyMlJYXbt28rPP9OmzaN5cuXc/jwYbp06fKlp14gsrKy0NbWRlNTk/j4+IKezn9ScHAwc+bM4a+//qJcuXLMnz+fXr16KXRXTkpKYuvWrSxdupTw8HB69uzJjBkzvpumFYIgCIIgCP+WCIj9AD6UXVKpUiUqVqwoDyblBJZiY2MJCQlRGiTKyMhQCEAp+zktLY26devmWyx+9erVxMTEfJbb+W7mloqKijyr690lkWpqaqirq6Ouro6amhoaGhoKX5qammhqaqKlpYW2trb8u6amJlKpVB60Cw8PJyIigqioKOLi4khJSfmoYNqIESMwNTVVOlarVi06dOjwWe4L4dt1+vRpvL298xx3d3fH39+fYsWKYWRkRJEiRTAyMsLExEThzey7vL29efr0Kb179yYtLQ1/f3/Onj1LWlqawn6FCxeWB8fe/zI1NUVFRQWZTMb9+/flwTEfHx/U1dVp0aIFDg4O2Nvb53oMP3v2jA0bNhAQEICJiQlZWVn4+PgQGRnJmDFjGD58OFFRUaxYsYI///wTNTU1hg8fzsSJEylTpsy/v1MLkI+PDw4ODqSlpXHs2LEPZnTJZDK6d+/O+fPnuXHjBlZWVvKx9evXM3bsWJydnZkwYcKXnnqBsrKywtfXl9DQ0DyfE4V/79GjR8ycORMXFxesra1ZtGgR7dq1U8jCTE1NZceOHTg5OfHixQu6dOnCrFmzvusgtSAIgiAIwscQAbEfwJMnT9i/f3+e40+fPmXv3r1Kx0xMTGjSpAlly5YlKytLXktLKpXKg0eampoKwaR3fzYzM8t32ZWvry+qqqrywFPOl46OjvxLV1dX/qWvr4+enh76+vro6+ujo6ODhobGv15ilZGRIe9GGRwczLNnz3j8+DHPnj0jLCyM2NhYpQEvVVVVtLW10dDQIDU1leTk5HyvU65cOfr3758rsKGjo8Pw4cPzzOARvrzo6Ghu3rxJWFgYWlpaVKtWjapVq+YZhPqnbty4wfnz5/McP3LkCA8fPpQ3bihcuLA8SNu6detcNeji4+PZtm2bPNNGR0eHatWqYWZmxt27dwkMDERTU5POnTujrq5OdHQ0ERERvHr1itevX8vPo6mpSbly5XIFyrS0tPDx8eH06dO4u7uTmZlJvXr1sLe3x9jYmOPHj3Pv3j0GDhyItra2wtzMzc0pV64cK1as4MSJE5iYmDB+/HhGjhypNCv1e3PkyBEGDBhA5cqVOX78uEKHv7wsWLCA2bNnc+zYMTp16iTf7uLiQufOnRk/fjyrVq36grP+NgwbNozt27dz4sQJ7O3tC3o6/3menp5Mnz6da9eu0bhxY5ycnHIFb9PT09m9ezeLFi0iMDCQn376iVmzZmFra1tAsxYEQRAEQfiyREDsBxAWFsbWrVvzHC9VqhS1atXKM7iloaHxj4MCV65c4erVq0rHTE1NGT58+D8676dKS0vj5cuXBAcHy4NegYGBPH36lJCQEKKiovLM8FJXV8fExITSpUtTuHBhJBIJkZGR+Pv7Ex8fj0QiQVVVNVdNKC0tLVJTUwFQUVFBR0eHv/76i5o1a3L58mVevHiBiooKlSpVomXLlgpdwYSv68WLF+zevTtXgwRLS0u6dev2WYNiycnJrFmzJlf2FmTX9TMxMeHgwYPcvHmTyMhI+VjhwoVp1qwZtWvXJisrCxUVFXn2WEJCAk+fPmX37t3yZc76+vqYmpry5s0b4uLi8pzP+4HlnEzIhIQEMjMz5fsVLVqUUqVKoaamJm8qIZPJUFFRYciQIXl2rDt58iRJSUlMmTKFfv365aqP9j3Kyspi/vz5zJkzhx49erBjxw50dHRy7ZeWlkZwcDAZGRmULl0ad3d37O3t+f3335kzZ458v1u3btG0aVPat2/PwYMHP3sQ9lv0119/MXDgQKZOnarQUED4cmQyGWfPnsXR0ZEHDx7g4ODAwoULFbIUIXsJ9YEDB1i4cCF+fn60bNmSWbNm0aRJkwKa+ecRGxtLXFwcRkZG6OvrF/R0BEEQBEH4BoiA2A9AJpOxefNmwsPDlY4PGTLkozIb/om0tDT++OMPIiIiFLarq6szYMCAz9b2PTU1lRcvXhAcHKwQ9AoKCiIoKIjIyEiFgJeqqqrCm30NDQ1KlSpFxYoVqVatGpUrV0ZPT4/IyEgeP36Ml5cXDx8+JCsrCx0dHXntm3fPAVCkSBFKlCjB48eP5d0lMzIyaNmyJX/++afC0iCpVCpf2ikUHJlMxrp164iOjlY63qVLF6pVq/ZZr/n8+XMOHjwoD5hCdg253r17KzxGEhMTOXToEAcPHuTWrVsKczQyMqJOnTp0796dDh06YGJiAmQvXxw0aBDXr18HspfijhkzhtGjR5OamsqQIUMYOHAg0dHRREVFKf2e83NUVJTCHJUxNDRk4sSJeY7r6enxyy+//Gce50lJSQwaNIjDhw8zf/58Zs6cqTRD9c6dO1y4cIH09HQge0n3gwcPyMrK4vDhw/L7IygoCFtbWypUqICbm1uuLLv/qsePH1O1alXq1q3LzZs3C3o6P5SsrCz27dvH7NmzCQ4OZsCAAcydOzfX8uWsrCyOHj3KggULuH//Po0aNWL27Nm0bNnyu2p8kZCQgIuLC8+ePZNvs7KyokOHDmhpaX2268hkMpKSkuSZ44IgCIIgfPtEQOwHERERwa5du0hMTFTY3rx5cxo1avRFr52amoqnpyd+fn5kZGRQpkwZGjZsSNGiRT/6HMnJyQoBr3eDXsHBwbx580a+r0QikS/TfDcLR0tLi7Jly1KlShV53bSKFStSoUIFDA0NuXfvHp6envKvnABimTJl0NfX5/Xr10RFRcnPp6KiQlZWFoaGhnTo0IHo6GjOnj0LZDcrMDQ0JDY2lqVLlzJ+/Pj/TEDgv+ZDGZTq6upYWFhgampKyZIlKVKkCDo6Op/8hjCnaUPO4yAtLQ1fX195xoKlpSXq6ur5niMqKopDhw5x6NAhvL29FQqSGxkZUbduXbp3707r1q1JTk5m8ODBeHp6AlCtWjUiIiIIDw+nadOmXLx4kYyMDHx9fUlOTqZ48eKULFmSqKgoIiMj5fteunQJNzc3QkNDFTqr5jAxMWHUqFF5zrl48eL8/PPPn3RffatevHiBg4MDAQEB7Nq1i86dOyvdz9/fnwMHDigds7a2li+VjIqKon79+shkMjw9PTE2Nv5SU//mSKVSeX3GhISE7yrA8l+Rnp7O1q1bmTdvHrGxsYwePZoZM2bk+tssk8k4efIk8+fPx9vbm3r16jFr1ix++umnb/7/LTMzk61btyr9QDCnhMHnuA3+/v64ubnx9u1bIPt1Q5s2bShRosS/PrcgCIIgCF+OCIj9QFJTU3nw4AGvX79GW1ub6tWrU7x48c92/qSkJGQyGbq6up/8AjMxMZGQkBCFINe7Qa93M8xUVFTQ1dVFVVWVjIwMkpKS5GPa2tpUrFhRHvCqUKGC/Hvx4sXl83rz5g1eXl5cv34dT09P7ty5Q3p6Ojo6OtSsWRNjY2Nev37Nw4cPSUlJkZ8/ZxmktrY2nTt3pmnTply9epV9+/YhkUjIysqibt263Lt3j0qVKrFnzx6qV6/+L+9Z4UsKCgpi165deY6HhISwY8eOXNtzGjVoaGigpaWFjo4Oenp6GBgYUKhQIXlBfGNjY3R0dEhNTSUzMxN9fX2qVauGjY0NBgYG+QbB3g+ive/ly5ccPnyYo0ePcvfuXYUadoaGhtSrV49q1apx6tQp/P39gexaYWlpaVhbW2Nvb6/Q1TQhIYHdu3fnmU367m3P6c6qpqbG5MmT88yIqF27Nj/99FO+5/seXL9+nS5duqCtrY2Li0u+v9fbtm0jNDRU6ZiKigqTJk1CRUWFli1bEhAQgJeXF+XLl/9SU/9mWVpa4u/vT3Bw8HffXOF7lpiYiLOzM8uWLSMrK4spU6YwadKkXMsKZTIZFy5cYP78+Vy/fp0aNWowa9YsOnfu/M1+4OPn58fBgwfzHB86dOi/zlTPKwCuoaHBiBEjRDkEQRAEQfiGiYDYDy4tLY0XL14gk8kwMzP7R8sHQkJCuHDhAmFhYUB2xkirVq0U3uAlJCQozezK+TnnU1XIfrNdqFAhtLS0kMlkJCcnK9RA0tHRkWd35Rf0ypGZmcmjR48Usr+CgoIAMDMzw87OjtKlS5OQkICHhwe+vr7yDBgVFRV5phdA69at6du3LxYWFjg7O7Nv3z7U1NTIyMigWbNmpKWl4enpyYQJE3BycvqsyzGELyM5OZmVK1fmWv6aw8TEBDU1Nd68eUNkZKR8WWFsbCzx8fEkJiaSnJxMSkoKaWlpCnXIVFVVGTx4sNI3XJcvX+bq1auoqqrKi+br6uqip6dHiRIlsLKyktesk0gkGBgYYGBggEwmIzMzk4yMDFJSUuT1vmJiYnj+/DnPnj0jIiJCaY2yHIULF2bs2LEKwbAcmZmZbN++nbCwMLS1tUlJSZEvAUpOTiYrKwvIzoLMqZvXsGFDWrZsmetc6urq/Pzzz9/9G8IdO3bw888/Y2dnx+HDh/PNbpXJZMybNy/f8/Xp04cZM2Zw+vRpLl++TL169T73lL8LgwcPZufOnRw+fJiuXbsW9HR+eG/fvsXJyYl169ZhYGDArFmz+Pnnn3PV/ZPJZFy9epUFCxbg5uZGlSpVmDlzJj179lT6nFKQXF1d5cvHlWnTps2/ahogk8nYuHGjQr3Hd9WoUQMHB4d/fH5BEARBEL4stQ/vIvxX3bx5k0uXLslr3Kirq9O0aVPq16//0ecIDQ1l165dCsGE8PBwdu3aRVBQEI8ePSIkJESh9pG6ujrGxsbo6emhqqqKvr4+MplMvhwxMzOTtLQ0zMzMFIJd+QW93hUbG8vNmzflwa8bN26QmJiImpoaNjY22NvbY21tTUpKCp6enpw6dYrY2FgkEgkymQxtbW2KFy/OmzdvSElJoXz58vTr14+ePXsSERHBggUL6N+/P9ra2shkMqysrOjcuTNr1qxBXV2dc+fO0aZNm0/97xAKiI6ODrVq1eLWrVu5xjQ1NenVq9cndf/MqSMTExODj48Pd+/eVbpfkyZNSExMJDw8nMTERPkxhQsXxt7ePlfGVUxMDGvWrCE4ODjPa6uoqKCqqoqqqio6OjrIZDIyMjJyNXywsbHJ842rqqoqPXv2xNnZWV4/LDMzk/T0dCpUqEBiYiJhYWFIpVJ5sNjDwwNVVVXq168vf/NctGhROnbs+F0Hw6RSKdOmTWPVqlWMGDGCtWvX5ts1N4eGhob8eVWZrVu3cuTIEY4ePfrDBsMAGjVqxM6dO/H09BQBsW+AsbExy5cvZ/z48cydO5dffvmFVatWMW/ePPr06SN/zpBIJDRt2pSmTZvi5eXFggUL6Nu3L3PmzMHR0ZF+/fp9cPn31/KhWl7/ttZXSkpKnsEwyP7AUBAEQRCEb5cIiP2gHj9+zLlz5xS2ZWRkcPHiRfT09BSWA8lkMmJiYpRmdpUoUULpskuJRIKhoSEaGhryN9FRUVGEh4eTkZHB69ev0dXVpWLFilStWvWTg17vzu3Zs2cK2V85Be2NjY2pX78+s2bNws7ODi0tLS5dusSJEydYs2YNWVlZ8iCYiYkJpUqVIiQkhLdv36KiosK0adPo06cPlSpVwsfHh9GjR3P06FH09fVRUVHB2NiY3377jZs3b/L777/j4ODA1q1bP6k2mvBtaN26NRKJBG9vb3lwt2jRotjb239SMAyyH/t6enro6elx48aNPPdTUVEhPT1d3hUyR9OmTZW+SVNVVaVNmzZs2bIlz46oWVlZZGVl5eqW+T4jI6N8xwsVKkTp0qXJyMigefPmVKtWjR07dvDkyRMALCwskEqlPH/+HFVVVWrWrIm7uzsWFhbMnj0bDQ0NihYt+s3XF8pPTEwMvXr1ws3NjbVr1zJmzJiPuj0SiQQrKyvu3bundFxFRYXly5ezZs0aeS2xH5WNjQ1Anp2IhYJhZmbG9u3bmTJlCjNnzmTAgAEsXbqURYsW0aFDB4XfAzs7O06fPs2dO3dYsGABQ4YMYe7cuUyfPp3BgwcXeFdZKysr3NzclD5n5tSH/Dc+lBGnpiZeZguCIAjCt0wsmfxBbdmyhdevXysdU1FRISIiQiEAlpCQIB/X1tambNmylC1bljp16uRbO2TlypWYm5v/o0wvZVJSUvD29lYIgL19+1b+JrR+/fryLxMTE9zc3Dhz5gwnT54kPDxcXggfoGrVqpQsWZJnz54RGBhI0aJF6dmzJ/369aNu3bpIJBLu3LnD/PnzOXHiBEZGRqSkpKCuro6joyMNGzZk6NChvHr1CmdnZ4YNG/ZdBwB+ZImJiUilUjQ0NIiIiEBLSwsTE5N//f958OBB/Pz88hw/c+YM/v7+8m6jKioqDBs2LN+ltsuWLVOom/dPtGzZkoYNG+a7T7t27fD19eW3337j5cuXQPbyn27durFkyRKkUimZmZk4Ojoyd+5cVFRUePbsGeXKlftXc/sWPHnyBHt7eyIjIzl06BAtWrT4pOPj4+PZvn27QtMDyA6W7d69G3t7e5YvX/45p/xdyqnbqK6uTnJysnj+/EbdvHmT6dOnc+XKFRo0aICTk1Oezx8PHz5k4cKFHDx4kBIlSjBt2jSGDx+Ojo7OV571/928eTPXB4AymYxu3bpRtWrVf33+3bt3ExgYqHSsUaNGNG/e/F9fQxAEQRCEL0MExH5Q8+bNyzPLBODw4cOUKVOGsmXLyr/n/Pxu5seSJUvky6rep6KiwowZM/5VTZHQ0FCF4Nfdu3eRSqXo6elha2srD37Vq1ePQoUK4efnx5kzZzh79izu7u7yIEd6erp8SaiJiQkBAQHcvHkTHR0dOnXqRL9+/WjZsqV8mcfNmzeZP38+p0+fxsTEBKlUSlxcHCNHjmTmzJn88ccf/P7771hbW7N3714qVar0j2+jUHBCQ0M5d+6cPEvL0NCQZs2afbZGCN7e3pw+fVrpmEQiwcLCgpiYGEJDQ3n16hVhYWHUqVMn3+VGy5cvz9Ut9n0qKirygvzKGBkZMXbs2HyD2X/99Ze81l6tWrVYvnw5x48fZ/Xq1RQpUoS0tDQOHTrEiBEjiIyMpEuXLuzZsyffeX0Pzp8/T8+ePTE1NcXFxYUKFSr8o/MkJiZy7do1/P395c9Zy5Yto3bt2uzfv/+bLUL+tVWqVImAgAACAgL+8X0tfHkymYyLFy8yffp07t27x08//cSiRYvyfK588uQJixYtYs+ePRQpUoTJkyczatSoXIX6P5fMzEwCAgKIjIxEX18fS0tLhey0V69ecefOHeLi4njy5An79+/n7t27FCpU6F9fOyIigh07duR6LVSsWDEGDx4saokKgiAIwjdMBMR+UMuXL88zy0RLS4tp06Z91Kf1J06cwMfHR+mYpaUlPXr0+Og5ZWRk8ODBA4UA2IsXLwAwNzdXyP6qWrUqqqqqJCUlcenSJc6ePcvp06d58eKFvPtfWloahQsX5qeffqJ48eL4+flx4cIFsrKyaNWqFX379qVTp07o6enJ5+Dp6cm8efM4f/48pUuXRk1NjefPn9O5c2ecnJzQ1NSkf//+eHh4MH36dObMmfNRNYWEb09kZCRbt25Vurywc+fOHx0US0pKIiIigvDwcF68eEFISAgvX74kLCyMt2/fUrNmTaXLLq9fv87Fixdzbe/evTtWVlZKr5WamkpiYiJ6enokJiYSGRnJq1evePLkibwxhYaGBpmZmXk2CciRU+xZ2e+5r68vBw8epHbt2ixfvpxy5crRs2dP7ty5Q7FixUhKSuLChQscOHCANWvWkJGRgY+PD9bW1h9xj32bZDIZq1atYurUqbRt25a9e/d+ljfLAM+ePcPOzg4LCwtcXV3FG+R3DBw4kL/++ot9+/bRq1evgp6O8AFZWVkcOnSIWbNmERgYSN++fZk3b16emaFBQUE4OTmxc+dO9PX1+eWXXxg7duwnL0XPT3R0NHv37pXXIYXs1zHdu3fH3Nw81/5hYWGUK1dOvrTzc4iNjWXTpk2EhoZSpUoVLC0tsbOz+6JLRnNqO2poaHxzzQwEQRAE4XshAmI/qAsXLuDl5aV0rF69erRt2/ajzhMfH8+2bdsUllRCdqHyIUOG5FtQOzo6Gi8vL3nw69atWyQnJ6OhoUHt2rXlwS87OzuFOmUBAQGcOXOGM2fOcOXKFdLT09HV1SUtLQ2pVIq5uTkODg6ULl2a+/fvc/ToURISEqhbty59+/alZ8+emJiYKMzF3d2defPm4ebmRoUKFShUqBB37tyhbt26LF++nEaNGrFv3z5GjRpFoUKF2LVrF40bN/6o+0j4Nh07dowHDx4oHdPV1cXCwkIhuBUREcHbt2+JiYkhPj6e5ORk0tLS5Etw86Kjo0ObNm2wsrJCTU2NlJQU3rx5Q1ZWFiVLlsTU1BRjY2OMjIwwMjJCRUWF8+fPKw3UvXnzhosXLxIUFIRMJkNVVRWJRCIvmp9TE08ZZWNlypShW7duClkbjx8/JigoiKVLl9KsWTPOnTtHv3790NbWRk1NjdTUVC5evEhmZia1atWiaNGi2NjYcObMmXzvh29ZWloaI0eOZOfOnUybNo1FixZ9tjeYb9++xc7ODhUVFTw9Pb/rJgNfwubNmxk5ciTjxo1jzZo1BT0d4SNlZGSwfft25s6dS1RUlDx7+v2/rTlevHjBsmXL2Lp1K5qamowfP56ff/6ZrKwsdHV1//HvhUwmY9OmTUREROQa09DQYNy4cQofeuUYMWIELi4uPH/+/F8X1s8xatQoPDw8ePjw4Wc5nzIZGRk8fvwYb29vQkND5duLFStGrVq1qF69ugi4C4IgCMInEAGxH1RaWhq7d+/OVdDb1NSU/v37f9ILqoSEBDw8PAgICEAmk1G+fHkaNmyo8AlwVlYWT548Ucj+8vf3B8DExIQGDRpQp04dNDQ0SEhIQEtLi+rVq9O0aVMgu+hyThAsMDAQVVVVDAwMiI2NRSaTUbduXRwcHLCwsOD69evs37+f169fU6FCBfr27Uvfvn2pWLGiwrxlMhlXrlxh7ty5XL16FUtLS0qXLo2bmxulS5fGycmJHj16EB8fz5gxY9izZw+9e/dmw4YNn/XTbaFgrFq1KleNp3c5OzsTGxursE1dXR1NTU10dXXR19encOHCGBsbY2JigqmpKaVLl6ZMmTIUK1ZMHuAyMDBAIpHIu6dqa2vnmX2ZlZVFQEAAHh4eBAUFoa6ujkQiISwsjEuXLhEUFCQPwBUvXhxdXV1CQ0PzXLYM2cG997NBDQwM5LddIpFQsmRJdHV1ycjIICgoiA4dOnD8+HF+//13Fi5cSLNmzXj27BkSiQRXV1d5xmZ4eDghISFcvXr1uw0Qh4eH06VLF+7cucPWrVvp37//Zzt3SkoKLVq0IDAwEC8vL6XZKj+627dvU7duXWrUqJFnIwLh25WUlMTatWtxcnJCKpUyadIkpkyZgoGBgdL9X79+zYoVKwgICMDa2loeeC5VqhQODg4YGxt/0vVDQkLYuXNnnuOtWrVS2jn72bNnWFhYsG7dOkaNGvVJ12TQIPjzz7zHX72CkiVh0SJwcYHAQEhIgNKl4aefYOZMeL/5zrNnMH06uLlBWhrY2MD8+dCsmXyX58+fc+TIEcrcvImdlxfGb9+SpaJCRLFieDZoQEClSmhoaGBvb4+Vqir88QdcuJB9fT297HPOnQu1aytee86c7O3v09SE9/+2hIdnz/P06ezbZGkJjo7Qvfsn3YWCIAiC8K0QAbEfWGZmJg8fPuTZs2cAlC9fnmrVqn2WrkhJSUncvn2b69ev4+npiZeXFzExMaioqFC9enWF5Y9ly5bF29tbaYZJUlISmzZtIiEhAQMDA9TU1IiOjkZDQ4NWrVrh4OCAtbU1Fy9eZM+ePfj5+VG0aFF69epF37595cXx3yWTyXB1dWXevHl4eHhQo0YNrKysOHHiBGpqasyePZsxY8agqanJtWvX6N+/PzExMWzYsIG+ffv+6/tG+DasXbuW6OjoPMeLFCmCubk55cuXx9TUFD09vc9e9DssLEyeHXn16lUePnxISkqKfFxdXR0VFRVKlCiBra2tPOvx2rVrCvvl0NHRQVdXl8jISIWg17vatGnDhQsXFLLFtLS0SE1N5ejRo0yePJnnz59jY2ODj48P48ePZ//+/ejr6+Pq6oqZmRnr169n7NixVK9eHR0dHTw9Pb/Lguj37t3D3t6ezMxMjh07Rr169T7buTMzM+nevTvnzp3j6tWr1KlT57Od+78kNTUVXV1d1NXVSUpKEku/vlPR0dEsWbKENWvWoKury8yZMxk1apTSD9dcXFyUBj91dHQYO3bsJ2Vs3bt3DxcXlzzHa9WqRYcOHZSO9erVi1u3bvH06dNPe93j5ZUdZHqHLCuL5IEDSS5alKI52Wpdu2YHvipXJktXlygPDwwPHSK1UCGeHjhAdTu77HqRL19mB6tUVWH8eNDVhR074PHj7ABZ48Y8e/aMffv2UdvLi3Znz/K0YkWeVqqEmlRKDR8fioeHc6BHD/yrVAFgVGAgxU6ezJ5D3boQFwebN0NwMJw7By1b/n/yOQGxjRuzA2c5VFWhd+///zs+HmrVyg6KTZgAxYvDwYPg7g579kCfPh9/HwqCIAjCN0IExAS5+Ph4UlNTKVKkyCe9KZHJZLx8+VIh+8vHx4fMzEwMDAyws7OTB7/q1q2b65Pj9PR0Vq5cSVpamtLzu7q64uHhQeHChenYsSMODg7UqlWLs2fPsnv3bq5fv46Ojg6dO3emb9++CsXx35/nuXPnmDdvHjdu3KB27do0bNiQQ4cOERERwbhx45g5cyZGRkZkZGQwd+5cFi9eTP369dm1axdly5b9pPtT+La5urpy/fp1pWOlSpVi6NChn/V6cXFx3LlzhytXrnD58mVevXpF48aN5VlDKSkp3Lx5k9DQUGxtbbG1taVmzZpkZGQwb9483N3dlS6jLFmyJM2aNePBgwc8ePBAnhFWpEgRhZo6mpqamJmZERAQIN+mpaXF/v37Wbt2LW5ubuzevZvk5GRGjBiBiooKGzduZMaMGZiamnLx4kVMTEwICwvD0tKSpk2b4uLiwvHjx3FwcPis99XXcOjQIQYOHIiVlRXHjx+nZMmSn/X8EydOZO3atRw7dgx7e/vPeu7/mgoVKhAYGIivry+WlpYFPR3hXwgNDWXevHls374dU1NT5s6dS//+/eUBp/j4eJydnfNc2h0fH8/YsWM/ulttcHAwf+aTrdWyZUsaNGigdMzHx4eaNWuye/fuf/1hV7SLC0YODjzu0wer95qLSKVS9u7dy/Pnz7H09aXHwYMc7tqViObNGThwILrTpsGWLfDoEVhYZB+UnAyVK0PRosRfvsz69etJT09n7Jo1pGppsW34cPj7QwiN1FQmrVzJ83LlOPB3AMv0zRvsp0zBpHz5/08kKio7o6tSJfDw+P/2nIBYZCTkl6G3bBlMm5YdpMvpnJmVBba22UG9kBAQNVUFQRCE78y/TwUSvnvh4eGcPn2aly9fAqCtrU3Dhg2xs7NTmvWRnp6Oj4+PQgAsNDQUdXV1mjVrRuPGjenTpw8NGzakdu3aeXZTe/nypbwb5PvLGd/VpEkTFixYgI2NDWfPnmXnzp306tVLXhx/9+7dODg4KK0TAtmBsFOnTjFv3jy8vb2xs7Nj0aJF7N+/H2dnZ3r06MHixYvlgYmAgAD69u3L3bt3mTdvHtOnTxdZC/9BDRo0wN/fXyFoBNlZWR9bQy8vaWlp+Pj44ObmhpubGw8ePJAXvYfsZYyjRo1SeMxqa2vTtGlT7OzsiIuLY968efj6+iqtUVa9enUGDRpE0aJFOX78OOrq6jRv3pxq1aoRFxeHl5eXwu3S1tYmJSWF8PBwhfOcOnWKFi1acPToUSC7u6yLiwvFihUjIiKC8ePHY21tzdmzZzEyMgKyAz1aWlqkp6djaWlJx44d/9V99bVlZWUxZ84c5s+fT+/evdm+fftnqyGUw9nZmdWrV7N+/XoRDPsItra2BAYGcvv2bREQ+86VLFmSzZs3M2nSJGbPns2QIUNYtmwZixYtwsHBgdevX+fb4frNmzdUrFiR/v37M2PGjHxfGwDyzteRkZG5xlRVVfNt9FGjRg3atWuHk5MTvXv3/ledX5O3b8cQ0BoyJNeYl5cXz58/ByD273ILWqmpREZG4urqisO1a1Cz5v+DYQA6OmBvD+vX8+jYMdLT0wHQTEsjqkgReTAMIF1Li3QNDaTvZLmFFS+O54MHdH43IFakCDRqBFeuKL8RMll2Fpi+vsL55a5dy854ywmGAaioQI8eMHUqXL0KrVrldzcJgiAIwjdH9H3/wcXHx/Pnn3/Kg2GQnaly8eJFefZMZGQkLi4uTJ8+ncaNG1OoUCHq1auHo6Mjb968oV+/fhw4cAAnJyfq169PoUKFSEhI4Pz589y/f19+3oyMDK5evcqvv/5KtWrVMDMzY/To0bkK8r/PxMSEnTt3UrJkSXr27El4eDjLly8nNDSUs2fP0rdvX6XBsKysLI4dO0atWrWwt7dHW1ubLVu2UKhQIWbMmIG+vj5eXl4cOHAAc3NzZDIZ27Zto0aNGsTExODp6cnMmTNFMOw/Sltbm6FDh9KgQQOMjIzQ09OjSJEiGBkZceXKFe7fv//BgvmQ/Tjz9fVl+fLltG3bFlNTU7S1tbG1tWXmzJlcunSJhIQEqlSpwpAhQ9i9ezc7d+7MM4B77do1Bg0axKNHj+TXl0gkNG7cmE2bNvHmzRtWr17NkSNHOHjwINWqVaNMmTIYGhpSsWJFateunetNZE7Nu5wMM4lEQuvWrWnRogWAvJvi8ePHcXR0ZPPmzUB28Pvo0aPyYNiZM2c4dOgQkydP5ty5c0ybNu1fvYn82hITE+nWrRsLFixg0aJF7Nmz57MHw44cOcKkSZOYNm0ao0eP/qzn/q+ytbUF4ObNmwU8E+FzsbCw4ODBg9y+fZuSJUvSuXNn6tevL68dmpfu3buzbNkyzp8/T+XKlenTpw+PHz/Oc3+JRELPnj0pXLiwwnapVMqxY8eUFtt/l6OjI48ePeL06dMff+Pel5FBETc3vCQSyjRpojgmkxHg5YVuQgJmISG0PXuWLImE4L8zzh89eoQsLQ2UPQ/p6AAQdeGCfFNw2bJUePaMujdvUigmhiKRkbQ/fRqt1FRu/v17lOPx48e5u4m/eZN3Fpi5ORQqlB0Q69cve2nkuz4wT+7cUX5eQRAEQfiGiSWTP7j8uk1mZmZy6NAh+QtYU1NTGjRoIF/+WKNGDTQ0NJDJZGzevDlX9glkv1g1MTHh4sWLXLx4kfj4eIoVK0a7du1o3749rVq1QldXN98lk6dOnSI2NjbP4vjvy8rK4ujRo8yfP58HDx7QrFkzRo8ezblz59ixYwfm5uYsWbKEzp07yzPg3r59y/Dhwzl+/DjDhg1j1apVeQYshP+ekJAQ9uzZk2tJoqWlJd26dVMI+oSEhHD8+HFcXV25f/8+YWFhZGZmyse1tbUpX7489erVo02bNtStWxczMzOFbMsdO3bw4sWLPOezd+9eAgMDad26Nb169aJjx44ULlwYd3d3Zs2axbVr1zA3N2fAgAFKj8/MzGTVqlUUKlSI0NBQLCwsaN26NWvXrpXvc+/ePWrUqMGtW7do27YtMTEx9OrVi549e9KzZ08sLCx4+PAhHTt2xMXFhaSkJKysrKhUqRIlSpTg0qVLBAYGovGdLJEJDg7GwcGBoKAg9uzZ80Uytzw9PWnRogUODg7s3bv3uwoWFiRPT08aNGhA9erVFT5EEf47XF1dcXR05O7du/z6669oamoq3c/f3x8nJydMTEz4448/cHJy4uXLl3Tp0oVZs2ZRs2ZNpcdlZmby5MkTIiIiMDAwoHjx4jRp0gQ1NTW8vLzybYTTsGFDsrKyuH79+j+rhXjqFHTsyAJTU2a90/kRyA5AlSgh/2ecgQEXWrfGt2pV+bZZt2+jev06vHiRHYzKUb8+eHlxoXVrvP5uDKCTmEjXI0cw/zvjDCBJR4f9vXvzqnTpXFPr2rUrVXOude0aNGkCs2bBvHn/32n16uyi/nZ22YX0r12D9euhXDnw9oacMhfjx2dvDwqCMmX+f3zv3rB/P4wdC+/8jREEQRCE74FYMvmDezcz7H2qqqq0a9eO33//nfr161O6dGmlLxZDQ0OVBsMge7nisWPHiIuLY8qUKbRv356aNWvmeqPYsmVLpZ/QSqVSVq9enefyzXflBPDmz5+Pr68vrVq14ty5c3h6ejJw4EC0tbVxdnbm559/VngTf+HCBQYNGkRaWhpHjx6lc+fO+V5H+G+RyWS4uLgorc/l5+fHqlWruHbtGvfv3yc0NFRhP11dXSwtLalXrx7t2rWjYcOGmJiYfPCaH3osT58+ne7du8uDstevX2fmzJlcvXoVVVVVVFRU6NSpU57Hq6qqYmFhQXx8PLt376Zjx47yJcGqqqr06tULa2tr1q1bx6RJkzAzMyMmJobIyEi6detGly5d2L17NzVq1ODkyZPcuXOHgwcPEh4ezl9//UWLFi1YunTpdxMMu3btGl26dJFnhVZ9583o5/L06VPs7e2pW7cuO3fuFMGwT2BtbY1EIsHX1xepVPpZGrsI35aWLVvSokULjhw5wurVq2ncuHGu5w9jY2OuXbtGlSpV+P333/nll18YNmwYu3btYvHixdjY2NChQwdmzZqVqwGGqqoqVapUocrfReUh+8M0W1tbevTowenTp5XWFoXsLLEOHTrg7u5Ok/czvD7G3r1kSCSEKGvKYWSE2/TphIeEUOLNGyr7+aHx9/LH7GEjVEaPzu7a2LMnLFyYXVR/w4bsYBSg9s7fnAx1daKMjYk3MOBppUpopqdj6+VFjwMH2DF4MDFFiihcXp4hFhGRXfS+XLnsOmDvmjBB8d85hfj79s2ex/Tp2duHDYNNm7KXSK5aBSYm2UX1jx3LHlfS6EUQBEEQvnXiFfsPLq8XiDmmT59Or169cmW4vCs2Njbfc/Tr1w8vLy9mz55NrVq1FN4oRkVFsXHjRiZOnMjBgwd58+YNMpkMVVVV6tSpw8yZM6lfv36+AQSpVMru3buxsrKid+/elClTBnd3d7p168bAgQNZsmQJ48aNIzAwkHHjxslfhKempvLLL7/Qpk0bqlatysOHD0Uw7AcUFhaWb7fJO3fucOLECSIjI7GwsGDIkCEcPnyY2NhYEhMTefjwIdu2baNr1675BsNSU1M5cuQILVq0YOPGjXnup62tTf/+/dHT0+PGjRs0a9aMhg0bcu3aNdTV1Rk5ciTPnz+nUaNG+d6uAQMG4OvrS9++fZk7d65CTbGc3+tx48YxevRo1q9fD4CbmxsDBgxg3759aGhocOLECSQSCV27dsXV1ZUZM2Zw4sQJ9PX1GT58eL7X/1Zs27aNFi1aULVqVW7duvVFgmERERG0a9eOokWLcuzYMaWd9YS86erqUqZMGaRSab7L44Tvm0QioVu3bly+fFneMTcoKIiUlBTat2/P6NGj8fPzY8SIETg6OmJjY4O3tzdDhw7F39+fXbt2yTM927dvj7u7e77Xs7Cw4OjRo1y+fJnx48fnWbusffv2VK9encWLF3/6jUpMRHbiBG5qapSxsck9rqFB+Z9/JsDCAvcmTTjTvj0OLi5UfPIEyM5Ok7Rvn51Z5e6e3W3SwiI7QLZwIQDp7wQOux86hEFcHCc6d8bPygqfmjXZOWgQqpmZNL90KdflVVVVISkJOnSAhAQ4cUKxk2Re+vTJ7iLp6vr/bdWrw9692R02GzSAChVgzRpwds4eF1n1giAIwndIBMR+cO9+mvq+okWLUrRo0Q+eo8h7n0i+r8Q7ywUgu0bZwYMHsbe3p3jx4owbNw59fX1mzJjBsmXL+P3335k1axbt27fP941lRkYGO3fuxNLSkv79+1OxYkVu3rzJmDFjGDlyJD///DOtWrXiyZMnODk5yeskQXbdjrp167JhwwZWrVrFuXPnMDU1/eBtFf578lqqm6NmzZokJibKg1/bt2+na9euCo+nvKSnp3PixAlatWqFgYEB3bp149KlSzx9+pSUPD5Nb9euHffu3aNZs2bY2dlx9epVtLS0mDJlCi9fvmTdunWoqKiwZcuWfK/dpUsX1NTUCAwMxNnZGYlEgkQioVevXnTt2pWzZ89y6NAhnJ2duXjxIpBdEHvbtm3yunkVK1Zk9OjR9O3bF3t7e7KysjAwMMj3zeW3QiqVMn78eIYPH87QoUO5cOECxvl1UPuHkpOT6dixI0lJSZw5c0Zeb034NDl1xLz/zooR/rvU1NQYOXIke/bsoWrVqmzZsoUmTZowc+ZM+XJvb29vtLW1adCgASNGjCA6OhojIyN69erFqFGjqFu3LuvXr6dly5ZcvHgxz+ejZs2asWnTJjZt2sTq1auV7iORSJg+fTrnz5/n7t27n3Zjjh9HkpzMzowMrKyslO5StmxZtLW1SUxM5JWZGQl6etR4/JjWrVv/fwno2LHZNbs8PbMzw/z9s+t5QXYRfcAwOpqKz57x9N3i+0Cqjg4vzMwwU7IM30hPD7p0gQcPsoNhn/KBQOnS8P6HRd26QVgY3LoFXl7ZnSX/zj6mUqWPP7cgCIIgfCNEQOwHV7NmTaXtzdXV1enQocNH1dMoXrw4ZmZmSsdUVVWxsbEhMzMTV1dXBg8ejImJibw4/sqVK3MVx//QNdPT09m+fTuVK1dm8ODBVK1alTt37jBv3jz50odixYrh7e3Nrl27KPNOrYusrCxWr15N7dq1kclk3L59m4kTJ4rlTT+w4sWL5/v/b21tja6u7kefTyqVcvr0aVq1aoW+vj6dOnXC1dUVXV1dBg0axPXr14mKimLevHk0bdqUIkWKoKOjQ4UKFbCzs2PChAnUrVuXK1euoKury2+//UZYWBhLliyRN5goV64cFy9ezNUhM0f58uUpWbIkAH369CErKwuZTIaamhpHjhxBW1ubO3fu0LVrV2bPns2KFSuA7CVL794X4eHhFC1aNNfSJolEwpEjRz76PvnaoqOjadeuHRs2bGDDhg1s3Ljxg9mw/0RmZiZ9+vTh0aNHnDp1SulzqfBx6tati0Qi4datWwU9FeEr0dbWZurUqQQFBTFx4kScnZ0xNzdn2bJlVK5cGS8vL9avX8+BAweYNGkSt2/fRiqVAtnPQVWqVKF69eq0bdsWOzs7Tp06pTQwNnToUKZOncqkSZM4deqU0rl0794dc3NznJycPu1G7NmDVFsbF8gzIBYfH8+yZcuIjY1l8ODB6KiqUrlECezs7BR31NXNruNVqxaoqmZnZ2lrk/l3sFjv7+WPEiXNXlQzM1F5b3shfX3K/f47uLllZ3Z9ynJQmQyCg7O7Sr5PQwPq1AFb2+yfc7LIWrb8+PMLgiAIwjdCRAF+cKqqqvTp04e2bdtSqlQpihYtSs2aNRkxYkSeQa735Sypej8TTFNTExsbGxYsWEDp0qVp1aoVHh4eTJo0iadPn3Lz5k3GjRv3UTWXIDuTZ/PmzVSqVIlhw4ZhY2ODj48Pa9aswdnZmVq1ahEWFsbJkye5dOkStWrVUjj+9evXtG/fnokTJzJy5Ehu375N9erVP+6OEv6zdHR0qF27ttIxTU3NXLVqlMnKyuLcuXO0adMGPT09OnTogKurKwYGBgwfPpw7d+4QHR3Njh07qF+/PioqKmhoaNCkSRPGjh1Lq1at2L59O23atOHy5cvUrFmT+fPns2bNGvmbpuTkZFq1asXgwYORSqXypcLvdsKUSCRUrlyZ8uXLc+PGDf766y9u3bolD+hlZGTIlzBXqFCBX375hQULFuDk5ISKigrx8fEKtyu/JUkhISG8efPmg/fN1+bn50e9evW4e/cuFy9eZNSoUV/kOjKZjIkTJ3Ly5EkOHjyY52NI+Dg2NjbIZDJ5d2Phx2FoaMiiRYsIDAykV69ezJgxg4oVK7Jjxw5GjBiBl5dXns10ChUqxK5du1BTU6Njx47Y2Nhw5MiRXB2CnZyccHBwoFevXkobN6ipqTFt2jQOHz7M06dPgezXDK6urpw5c4aHDx/Kg3FykZHg6kpA1apkamjI6zQC2csUk5MBWL58OYmJicyZMwez27dRjYtDpW7d/O8UT084ehSGDqVaw4YARBsZkSWRYPX4cXbA6m/6cXGYvXjB6/deg/W4dg3JwYPZdcC6dMn7WpGRubdt3Ji9vW3b/OcZEJBdV6xDB5EhJgiCIHyXRJdJ4aOlpKQQGBhIVlYWZcuWxSCn89DfZDIZz58/x9fXl3v37nHo0CEePnxI0aJF6dWrF/369aNOnTqf1MVJJpPx8uVLjh49yurVqwkJCaFnz57MnDmT0qVL4+TkhLOzMwYGBsybN4+hQ4cqLch84sQJhg0bhpqaGjt37qRNmzb/+v4Q/jtyMhhv374t7xippaWFgYEBJUqUoFatWpR+r4OXTCbj4sWLrFixAnd3d1JTUwEwMTGhS5cujBs3DktLy3yv+/DhQ8aPH8+VK1eA7OXHU6ZMybWMU0VFhX379uHn5yffVr9+fU6ePImRkRFxcXEkJCTw9OlTPDw8FLIk/P39OXLkCBkZGWzevJkRI0aQmZnJzz//zPbt29mwYQOjRo1CX18fbW1tIiIi5MdOmzYt3+w4hQ5m34AzZ87Iax66uLgovkH9zFasWMGUKVPYtGkTP//88xe7zo8iPj6eQoUKoaqqSlJSUp5dCIX/vmfPnjF79mz279+PhYUFkydPJiwsLM/9czr6XrlyhQULFnDp0iWsrKyYOXMmPXr0kC8BT0pKolGjRkRGRnLr1q1cH+KlpqZSrlw5fvrpJ3r16pUrOFusWDF5fUcA1q2DceNY3a4df4SGKgbafHygZUuSO3bkt717adi4MZ1KlYLdu6FUqexlkTnlJkJCsgvV29tn1+16/Dg7yFS5MqF799J7xAgqVqyImZkZHV1csLl7l+dly+JnaYlmejq1b99GPyGBPwcO5EXZsgA0f/CARkePZmecjR6d+07r3Dk7Iw1ARye7oH+1aqClBR4e2V0jra3h+vXs8RxVqkD37mBmBs+fZwfO9PWz9/s7K1kQBEEQviciICZ8lBs3buDm5qawXKFevXq0bt0aiURCVFQUBw8eZM+ePVy/fh0dHR06d+5M3759admy5T9arvT06VMOHDgg/6Q3KyuLatWqYW9vz9atW5kzZw5JSUlMmTKFqVOnov9uu/K/JSUlMWnSJLZs2YK9vT3btm37qLpowo8pJSWFK1euKF221b59e2rXrs3FixdZvnw5165dkwfBSpQoQbdu3Zg4ceJHBWEePXrE2LFjuXr1KpC9bHPhwoXY2tpy6NAhpcfEx8fj7OwsD+r27t1bYdzPz4+DBw8qPfb27dvyrLP09HT69+/PkSNH2LFjB/379wfA1NSUhIQEEhISADh//jybN2/G2to6z9sxePDgj84k/ZJkMhnLly/n119/pUOHDuzevTtXwP5zOnToED169GD69On/rBC3oJSZmRkvX77k9u3bIuNO4N69e8yYMYOXL1/SvXv3PPdr1KgRzZs3l//b09OTBQsWcPbsWSpWrMiMGTPo27cv6urqhIaGUrduXUxNTbl69So67wZ7gGXLlrFz50569Oih9FqVK1emZ8+e2f+ws4OgIBpXqECpMmXYu3fv/3d8+xZmzuTNoUPoxcSgq6GBpEwZ+OknmDkT3q1nGBMDgwfDzZvZNbtKloQePbhYty59fv4ZTU1Ndu3aRUBAAG9evaK2tzc1793D6O/6XmGmprg3aULw30u2CxcuzM83bqC5b1/ed+7z5/B38Izhw7Mz0l6+hNRUKFMmu9PkzJnZwa539e6dHfwKD8++Dfb2MHcuFCuW97UEQRAE4RsmAmLCB/n7+3PgwAGlY8WKFePs2bOcPXsWmUxG69at6du3Lw4ODv//FPUTJSUlsWHDBuLi4pQG0nx8fDhx4gSDBw9m3rx58lpJ7/P29qZv3768evWKVatWMXz48E/KThN+PLGxsaxZs0ZpHZqsrCw2btxI5N/LS0qWLEmPHj2YNGkSpUqV+qjz+/r6MmrUKPlSxNKlS7N06VK6d++Oqqoqhw8fzrfLnpeXF1u2bFEahNq5cychISFKj5NKpUyfPh0VFRW6d+/OhQsXOHDggEJXVUtLSwIDA0lPTyc5OZmqVatSo0aNPANixsbGjB49usB/p1JTUxkxYgS7du3C0dGRBQsWfNGagB4eHrRs2ZKuXbuya9cuUX/wM+revTuHDx9m48aNjBw5sqCnI3wCmUzGmzdvSE5OxsTE5B///VfG1dWVq1evKs3+Bjh16hS7d+/G4r1i897e3ixcuJDjx49TtmxZpk+fzqBBg3j8+DGNGjWiXbt2HDx4UOF3OCEhgTFjxlC+fPk85zNlyhR55qxMJsPIyIgpU6Ywc+ZMhf0CAwOpXLkyCxcuZNq0aR99e6VSKb/99huLFy+mXbt2/PXXXxgbG5Oens6lS5fw8fFR2gxGTU2NqlWr0rJly0+qeykIgiAIPzLlry4E4R1eXl55jgUGBhIREcHKlSvp2bMnxf7Fp4SJiYls2LCB5cuXU79+/f93X3qPlZUVs2fPxkZZi3Oyl78tXbqU3377DWtra+7evZvrhbIgKPP48eM8u5WpqKhQq1YtrKysmDp16kfXvoPs7K0RI0bg4eEBgLm5OStXrsTe3l4hoJT8d82ZvCxZsiTPjKy3b9/meZyamhrh4eEMGzaMGzducPLkyVzLho2MjHjx4gVZWVksWLCAsLAwzp07x+vXr+VLOnPo6urSrVu3Ag+GvX79ms6dO3P//n327NlDnz59vuj1njx5goODA7a2tvzxxx8iGPaZ1alTh6NHj3L79m0REPuOvH79mmPHjsk/LJBIJNSoUYP27dvnGcT6FC1btqRYsWIcO3Ys11hUVBR37tzB0tKSQYMGsXXrVvkSydq1a3Ps2DEePHjAokWLGDVqFPPnz2fatGn88ccf9O7dm1mzZrFo0SL5+fT19T+Y5ZuYmCgPOL1584bY2FilBfVnzZqFiYkJ48aN++jb+urVK3r37o2XlxdOTk5MnTpV/jyjoaFB27Ztad68OQ8fPuTly5ekpaWhoaFB8eLFsba2zpXxJgiCIAhC/kRATPigd2sKvU9fX59Lly79qxdh8fHxrFu3jpUrVxIfH8+QIUOoUqUKMTExSvdXV1fPMyssJCSEAQMGcO3aNaZPn86cOXNydcgThLzkLIHMy7Rp02jWrNlHn8/Pz49hw4bh6ekJZC+3WbNmDS1btlQaTCpRogTPnz/P83x5Pe4BDAwMSPq7C5kygwYN4v79+1y4cIGGfxdphuxshCtXrtC8eXNat26Nk5MTd+7cwdHRkUqVKsm/Hj58SHJyMiVKlMDa2hotLa2PuQu+GG9vbzp16oRMJsPd3Z06dep80euFh4fTrl07ihcvzrFjx0SNqy/AxsaGrKwseeBY+PYlJiaya9cuUlJS5NtkMhn37t0DwN7e/oPnyMzMJCwsjKysLExNTZVmhlevXh1jY2Nu3LjBkydPePr0KV5eXjRp0oSLFy8yfPhwduzYwZEjR9i1a5fCdatXr87+/fuZM2cOixcvZtKkSRgbG9OuXTsWL15MpUqVGDRokHz/ypUr8+TJE6VzVVNTw9DQUP7vnIzeKlWqKOx39+5d9u/fz9atW9HW1v7gfQBw7tw5+vfvj6amJlevXqVBgwZK99PQ0KBWrVq5GgcJgiAIgvDpxMfbwgcpq82VQ01N7R+/MYyNjWXevHmUKVOGuXPn0qtXLwIDA9m0aVO+1wSUvhnft28f1tbWPH/+nMuXL7No0SIRDBM+SX4Bp/fHExMTiY2NzdXNDLKXRtrZ2VGlShU8PT2pVq0a7u7u+Pn50apVqzwzq/JrOmFhYYHxu3Vn3pNXRiVAaGgovr6+XLp0SSEYJpPJOHLkCNevX5dncmRkZFC/fn3Kly8vz5YrUaIErVu3plOnTtSrV6/Ag2H79++nUaNGlCxZktu3b3/xYFhSUhIdO3YkJSWFM2fOULhw4S96vR9VTtZvQEDAB7MlhW/D3bt3FYJh7/Lx8SExMTHf4x8/foyzszN//PEHO3fuZOXKlUprOEJ2ncMuXbrg6OjIxo0bGTFiBEeOHKFDhw50796duXPnkpKSgoODA/Xr1ycqKkrh+MqVK/Pnn3/y5MkTOnbsyIULF9DU1GTYsGGcPn1avl/jxo3zzBS2sbFReM3z+PFjNDU1cy2xdHR0pHLlygqBtrxIpVIcHR1p164ddevWxcfHJ89gmCAIgiAIn5cIiAkfVKNGjTzHqlevLl+e8K7Q0FD27dvHkiVLWLVqFefPn5e/aI6Ojub333+nbNmyLF68mAEDBhAUFMS6devknfzy61xXunRpChUqJP93XFwc/fr1o0+fPrRr144HDx7QpEmTf3hrhR9ZpUqV8lz2W7x4cSpUqMDr16/5448/WLFiBatXr2bNmjX4+PgA2cXy69ati5WVFTdu3MDGxoZbt27x4MEDGjVqlO+109PTGTt2LH/88QexsbEKYxYWFnTq1Cnf421sbJTWvYmNjcXNzY2rV6/myih4+fIl/v7+Ss8XGBhIcHBwvtf82rKyspg5cya9e/ema9euXLlyBVNT0y96TalUSu/evfH19eX06dOUKVPmi17vR2ZkZISpqSkymUyxY5/wzcqv+6NMJiM0NDTP8efPn3P48GGFoFlqaipnz57lwYMH+V5XS0uLSZMmERgYyNSpU1m/fj2rVq3C0dGRxo0b4+XlRfHixZk3b16uY8uXL8/WrVsJDAxk8ODBZGVl0bFjR8aMGUNUVBSmpqY0bdo0V8ZwtWrVaNWqlcK2x48fU7lyZYXXQZcuXeLChQssXLjwg0tGX716RbNmzVi2bBlLlizh5MmT+X7wIQiCIAjC5yWK6gsflJmZyeHDh3O9cTY1NaV///65skWCgoLYs2dPrsyZwoULEx4eztq1a5FKpYwaNYopU6bkan2ec819+/YRGBiosF1bW5uBAwfK6zd5eHjQr18/YmJiWL9+PX379i3wukbC9y0hIYHjx48TFBQk31a+fHk6depEeno6W7ZsUVrQ2MfHh+PHjwNQr149tm7dSrVq1T7qmi9evKBJkyYEBwdTokQJzp07h76+PklJSZQoUYIiRYrke3xSUhKHDh1SKKqfkZHBo0ePePjwIRcuXFAaLLt8+bK8wL8yDRo0oGXLlh91G760hIQE+vXrx8mTJ+W1db7077pMJmPMmDFs2bKFkydP0q5duy96PQE6d+7MiRMnWL169SfVXhIKhouLi3x5pDK7du2iYsWKNG3alKZNm2JjYyNfErl79+5cf+NzGBsbM2bMmI+eR3h4OAsWLGDz5s0YGxvTrVs3du/eTUxMDKVKleL48eN5LjH08/OjQYMGxMbGoq2tzZgxY5g8eTKOjo48efKEYcOGMWXKFNzd3XPVCmvQoAFly5Zlz549QPZzRr169VBRUcHLyyvf56izZ8/KX0MdOHBAZIUJgiAIQgEQGWLCB6mqqtKjRw/69OlDrVq1qFmzJl26dGHIkCG5gmEymYyzZ88qXUYWExPDrVu3GDVqFMHBwaxYsUJpMCznmn369MHe3p4KFSpgZmZGgwYNGDlyJCYmJmRkZDBr1iyaNGlCqVKluH//Pv369RPBMOFf09fXp3///owdO5Z+/foxbtw4+vXrh56eHtevX1caDIPsQvkNGzbE39+fGzdufHQw7NChQ1SsWJHg4GC6du1KYGAg1atXp1y5clStWvWDwTCAw4cP5+owqa6uTrVq1Th16lSeHdM+9Pvyrfw+PX/+nPr163P58mVcXFyYNm3aV5nb8uXL2bhxIxs3bhTBsK+kdu3aqKio5LlsTvi2VK9ePc8xXV1dhg4dikwmY/78+dja2mJkZETbtm1xcnLixYsXeR779u1bMjIyPnoeJiYmrF27Fn9/f5o3b866desoXLgwrVu3JiwsjNq1a9O1a1elz9+WlpbcunULQ0NDihYtysaNGylXrhxSqRQvLy9SU1NJS0vDxcVF4TiZTIavr69CkOzIkSPcvn0bJyenPJ+jcpZItm/fnnr16oklkoIgCIJQgESGmPBZRUdHs3bt2jzHixYtyujRo//VNQICAujbty93795lzpw5TJ8+/bN0shKED1m7di3R0dF5jo8ZM+ajl7tkZmYydOhQ/vzzT9TV1dm+fTv9+/f/5Dm9fv2aLVu25Dlev379XMt8coSGhrJt27Y8jx08eHCeXS2/BH9/f27evMnbt28pVKgQtWrVIjY2lu7du2NoaIiLi0uu4tVfyv79++nduzczZ85kwYIFX+WaQnbWTPv27SlfvjzPnj0r6OkIH+HixYvyxiE5tLS06N+/v3xJc0ZGBnfu3OHq1atcuXIFDw8PBg0alOfzZXp6Ordv36Zdu3a0b9+eSpUqfVIQ/P79+8ycOZPTp09jZWVFYmIiISEh6OjosGnTJqXPte7u7rRs2ZJevXphbm7O6tWriYuLw8DAAFtbW2JjY7lx44Z8/7CwMEqWLMnx48dxcHBAKpViZWWFubk5Z8+eVTqvd7tILlq0iClTpohutYIgCIJQgMRfYeGzUpYZ9q6PfeGXkJDAnTt3uHHjBm/evAGyP43dvn07NWvWJCYmBk9PT2bNmiWCYcJX86HH2sc+Fl+/fk3FihX5888/KVeuHM+ePftHwTDIvwvsh8ZLliyZZ4aHlZWVvKbf1+Dp6cmBAwcIDg4mMTGR0NBQXFxcWLVqFdbW1ty8efOrBcPc3d0ZOHAg/fr1Y/78+V/lmkK2nML6gYGBJCQkFPBshI/RqlUrBg4cSI0aNbCwsKBp06aMGTNGob6furo6tra2/Prrr5w9e5bo6GhsbW3zPGdcXBxxcXFMnz6dypUrU758ecaOHcvp06c/quGCtbU1p06dwt3dnUKFChESEoKFhQVSqZQBAwZgbW3Ny5cvFY5p3LgxW7duZdeuXejp6RESEsLo0aOJi4vjwoUL3Lx5UyHw936HyT/++IOnT5+yePFipXM6e/YsNWrUIDg4mKtXrzJt2jQRDBMEQRCEAiYiCcJnZWRkRKFChYiLi1M6bm5u/sFzeHp64ubmphBcK1++PMeOHePo0aMMHToUZ2dn9PT0Ptu8BeFjVK5cOc8AU/HixRWaPWRmZvLw4UMCAgKQyWSYm5tjbW3NiRMn6Nu3L+np6QwePJitW7cqbUzxsXR1df/VuIODA0WLFsXb21ueDVG7dm3q16//1ZZMJiYm4ubmpnSsVq1aDB48+KOWjn4Ofn5+ODg40KBBA7Zv3/7NLBv9UZiYmFCsWDEiIiK4d+8ejRs3LugpCR+hbNmylC1b9qP3V1dXp0ePHuzZsyfXcu/k5GQOHTpEREQEOjo6VKlSBQ0NDY4cOcL69evR1NSkadOmtG/fnnbt2lGxYsU8r9OoUSM8PDw4deoUM2bMID09neLFi/PgwQPKli3L+PHjWbFihTwwNXDgQJ48ecK0adOoUKEC69atw9fXFz8/P968eUPDhg3p3bs3M2bM4PHjx2hpaWFubk5ycjJz5syhT58+uRoRSaVSZs+ejZOTE+3bt+fPP/8UhfMFQRAE4RshlkwKn93Dhw85evRoru26urr8/PPP6Ovr53lsQEAAe/fuVTp27949BgwYQJcuXT7bXAXhU6SmpvLHH38QGRmpsF1NTY3+/fvLlxemp6eze/fuXBkIaWlpODs7k5WVxYEDB3BwcPjXc8rMzGTNmjXEx8crHR84cOBHv1HNysoqkIwFHx8fTpw4ked4o0aNaN68+Refx5s3b7C1tUVPTw8PDw8MDQ2/+DWF3Dp27MiZM2dYunQpkydPLujpCF9QZmYmjx494smTJ2RlZWFubk7NmjWRSCT4+Phw5coVrly5wrVr14iPj0dLS4vSpUuTmZlJSEgImZmZVKhQQb60skmTJmhra+d5rb179zJ79mxCQkLQ1tYmJSUFY2NjDh48SLNmzYDs58GePXty5swZrl27RmxsLC1atKBKlSqoqqoSGxvLy5cvKVu2LJqamvj7++Pk5MRvv/2Gv7+/wgd/YomkIAiCIHzbREBM+CJ8fX25evUqERERSCQSLCwsaNWqFUZGRvket2fPnjzrxqipqTFt2jR5hypBKAipqalcv34dPz8/MjIyKFOmDA0aNJB3PgVwdXXl+vXrSo8PCAhg1apVFC1a9LPNKSQkhL1795Kenq6w/VvqEpmfO3fucOrUqTzH86uD9rkkJibSpEkT3rx5g5eX11etnSYomjNnDgsXLqRr167s37+/oKcjfAMyMzMVAmTu7u7Ex8ejoaGBsbExSUlJxMXFoaWlRfPmzeXZY8qy0tPS0ti8eTMLFiwgKioKmUyGTCajVatWHDlyBH19fZKTk2natCmhoaHcvHmTrl27oqqqipmZGbVr1yY5OZmkpCQiIyNJT0/n9OnT9OvXT6GGak4XSW1tbfbv3y8K5wuCIAjCN0gExIQvKjU1FTU1tY+urbR69WpiY2PzHB83btwHg2qCUNBWrFhBYmKi0jF1dXWmT5/+2bME4uPjuXPnDuHh4ejo6GBtbU2ZMmU+6zU+N5lMxvnz55kzZ06+XRz79euXZ6fMz0EqleLg4IC7uzvXrl3LteRJ+LpcXFxwcHCgTJkyBAcHF/R0hG9JQADMno3MwwPZ27fEGhhw1siIKa9f8yY+Hn1VVX7W0KB1SgpVAX2JhFhjYxJ69cLcyQlNHR35qRKfPiWoWze0Hj2ihExGJhAgkRDTpw+td+3i9Zs31KtXj/Lly9O6dWuFDxz6/fUX5YOCuFWnDmd/+onk5GRCQkL4pVs3aj98SNjWrei9fo2Kujo6deqgPncuvP/hhJsb7NkDHh7w6hUULw7Nm8P8+fB+B+6sLNiyBTZtgmfPQFcXbGxg9myoX//L3d+CIAiC8B8n8raFL0pLS+uTit6/W4PpfSoqKh+shyQI34L8ij5nZGSQkZHx2a9pYGBAs2bN6NWrF/b29t90MCwzM5NDhw5RrVo12rVrx82bN/H29la6r7m5+UfVHvynZDIZY8aM4fz58xw+fFgEw74BtWrVArIzH2NiYgp4NsLnFB0dzZUrVzh16hTe3t6kpaV9/MEvX0LdunDjBpKxY1FZswajn36i75MnhDVpwt27d1k3aRJLUlJQU1NjJTAV8H77Fsu1azmkr89PP/3Exo0bCQ4ORi81lepGRpScMIGzzZvzm0RCmExG6z172GhkRFxcHH/88QcNGjRQCIZV9vWl9HvL4XV0dLC0tMRr+nTS5s3D4/Vrbjs4oL90KeqpqdCqFezYoXh7fv0VrlyBzp1hzRro1QsOHoSaNeHvZkJyU6fCqFFQrRqsXAmTJ8PTp9CkCdy69Un/B4IgCIIg/J/IEBO+KXnVH4PsrlGdOnX6uhMShH9g69athIWFKR0rXLgw48aN+yGLtefUVnNyciIgIAAAiUSCiooKhQoVYvPmzQQHB5OUlISGhgY1a9akefPmaGhofLE5OTk54ejoyPbt2xkyZMgXu47w8WQyGUWLFiUqKoqLFy9+F8t+hQ/z9vbmzJkzvPuyU19fn379+lGsWLEPn2DRIpg5Ex49Aiur/28fOBD++guioyEzE8LDyaxcmYcPH8qXWHY9c4b+GRlUBHKKMpiZmdG5c2c6duxIo0aNCAsL47fffqP7rl00A8oaGjJ63DiFpieqGRmMWb8en5o1aXb5sjxDLEfRiAgOX7vG5YcPsbW1ZdasWbRv0QJJzZqQmJgd1Mvh7g4NG8K72cLu7tlBrpkzYcGC7G1SKRgYwE8/waFD/9/3+XMwN4fx42H16g/ff4IgCIIg5CIyxIRvStWqValXr16u7aVKlaJNmzYFMCNB+HT181nC8jW7N34rkpKSWL16NeXLl2fo0KG8evVKfh/o6upStGhRPD096datG5MnT2batGn8+uuvtG3b9osGw/bu3YujoyO//fabCIZ9QyQSCbVr10ZVVTXPzEHh+xIREZErGAaQkJDA4cOHc21XKqdxyDv1GoHs5YUqKqChAcbGYGWFqqoqNWrUYOLEiRw/fpy+fweSVgwbxk8//YSenh4vXrxg9erVtGzZEj09Pfr27UvdunWp4eCADtC0fv1cHYAbXL+ORCbDM4/n+MhixegwaBBnzpxBRUWFDh06UKt+fQIqVsxeFpmQ8P+dGzdWDIblbDMyAj+//2/LyICUlNy3u1ix7OPzaCIgCIIgCMKHffxaNkH4CiQSCW3btsXa2lpetLxs2bJUrFhRdGYSvhtWVlYkJCRw6dIl+fJIVVVVGjVqJF8O9iOIiYlh3bp18tqAxYsXB7LvC3V1dQoVKoSGhgZXrlyhQoUKQPZzQF5d4j6ny5cvM2jQIAYMGMCcOXO++PWET1OrVi0uX74sAmLfsDdv3nDz5k0iIyPR09OjZs2aVKpUSWnA38fHJ8+gV2RkJDdu3MDMzAxtbW10dHTQ1NTMfZ6mTWHJEhg6FObOhSJFwNMTNm7MzpLKp6SCSkQEAPZDhmBvZ0dWVhaPHj3i8uXLnDt2jEc3bxLq6cltT08GAXc0NKjy3nO1QWwsDT08OOHggDSf5j4JCQmUL18eDw8Prly5wvz587l18iQlJRJcXFzo3qtXrkCbXGJi9pex8f+3aWtDvXqwcyfY2UGjRhAbm11rrHBhGDEiz7kIgiAIgpA/ERATvkklSpSgxPtFZQXhO2Jra0uNGjUICQlBJpNhZmaGzjsFnf/LXr9+zapVq9i4cSNSqZSqVaty//59UlJS0NDQwNTUlKSkJNTU1Lh8+TLlypX7qvN7/PgxnTt3pkmTJmzduvWHy9j7HtjY2JCens6NGzcKeiqCEr6+vrkyu548eYKdnR2tW7fOtX9eTUZyDB06FL93sqIkEgk6OjryAFnO10gzM/qfOoWWi4t833O1a3NZQwOduXOVHqOrro7dokWolixJsIEBOi9foq2tTYUKFahatSoTUlLg6lX5+W4ZGDC/XDlqvxe0an3hAq9LlOBxtWofvH98fHyoVKkSzZo1o1np0mRaWXHZ2Jje/frx29y5zJgxg759++bumu3sDOnp0LOn4vbdu7O39ev3/23m5nD9evZ3QRAEQRD+EREQEwRB+EK0tLSwsLAo6Gl8NUFBQSxbtowdO3agqalJ69atuXXrFvfv36dChQr4+fnRq1cvPDw80NTU5NKlS1+9+H9YWBjt27fHzMyMw4cPf9ElmcI/l5NJGRoaSmRkJEWLFi3gGQk5MjIyOHnypNKMLy8vL6ysrChZsqTCduN3M56UWL58OSoqKiQnJ8u/UlJScv076+FD/NPSuFasGG9lMmqHh/OTtzc3g4PZrK5OSkoKKSkpCsX6NwMtgPbA2apVc127ooYGlbW1Ka6mRjuplGLp6Zi/94Fc2efPqeLry7bhwz/qPpI3g0hOhu7dUdXVpeXt29wOC2PBggUMHjyYuXPnMn36dAYNGoSmpmZ2/bC5c6FHj+xuk+/S18+um2ZnBy1aZBfdd3KCTp3g2jXFjDJBEARBED6aKKovCIIg/CuPHj3CycmJ/fv3Y2RkRL9+/Xjw4AFubm7Y2dnx8uVL4uPjmT9/PsuWLUNbW5tLly5RqlSprzrPhIQEmjRpQkREBDdu3Pjq1xc+nkwmo3DhwsTFxXHmzBnatWtX0FMS/vbkyRP279+f53hqaioJCQlIpVL5l0wmo0yZMkqXCkZFRXH37l2F/ZV9/ZSYyKq4OGrr6xOSmSnfvi0rix6AGRD93rmnAMuAWcDCj7x9m4FOWlpsnTwZqbo6ksxMft68mdclSnCic2f5fr/PmZOrqH6OokWLMvrnn7M7SJ4/D2fPKgS5Hjx4wMKFCzl06BAlS5Zk8cCB9N24EYmZWXZgTF///yeTSrM7TzZtCmvX/n97QEB2kOyXX7KXkgqCIAiC8MlEhpggCILwj9y4cYPFixfj4uJC6dKlcXJyIiIigtWrV1OiRAkGDhzInj17qFWrFjt27GDAgAEUKlQINzc3TE1Nv+pcpVIpPXr04NmzZ3h4eIhg2Dcup7C+u7s73t7eIiD2DXk3+0qZ0NBQHjx4gJqamsJXQkIClStXzs6G+ltiYiKxsbFUqFABdXX1XMe8+zV4xw6iDQ0ZOm6cwvbSDx6gu2EDB6dP522NGvLtZa9cofrq1bz66SdaTZlCuzzOm56ezrNnz3jy5An+/v488PRkREAAZUJCCKxQAev79zF++5ZTHTpQKCfz628a6ekUiokhSVcX6TvZptra2jB8OJw6BXv25Mr4ql69OgcOHGDOnDlsnjWLxgsX8kJVlTMdO9JfIkHv3Z3d3bM7a65cqXhHV6wIlpbZyyYFQRAEQfhHREBMEARB+GgymQxXV1cWL17M5cuXqVy5Mn/88Qfa2tpMmzaNiIgIxo4dy4MHD/jzzz/59ddf6dWrF23atKFo0aK4ublh8n63tK8w51GjRuHq6sqZM2eoXr36V72+8M/Y2Nhw/fp1bt++XdBTEd5RunTpfMcnTZpEjRo1lI5lZmYSFBREUlISJiYmn1YrdO9eKFyYyZMnK24/eBA2bKBFkybQtm32thMnsrOpunSh1MGDlFJRQSaTERoayv379/Hx8eH+/fvcv3+fgIAAZDIZqqqqmJub0zolBQDN1FQACsXFoZqVxdA//sg1pRr371Pj/n329+zJE0tL+XaLbduyC/47O0Pv3nneJMtixXD29UVapAi/N2/OMicnZm/YwC+//MLYsWMpVKgQhIfn3Hm5T5CRkZ1BJgiCIAjCPyICYoIgCMIHZWVlcfz4cRYtWsSdO3eoVasWR44cwdLSkgkTJnDx4kU6dOhA9+7dmTZtGjKZjPPnz2NiYkLLli0xNTXF1dW1QGpBLVq0iG3btrFjxw5atWr11a8v/DM2NjakpqZy69atgp6K8I7ChQtjbW3N/fv3c40ZGxtTVUmdrhyqqqpUrFjxn124UiW4cAGePs3+Oce+faCiAjmBbnd3ZL16kWRjw/F27bg3dao8ABYVFQWAoaEhjS0tadu2LX369OH58+dcuHCBgIAAlgNZwH1VVdSBR1Wr8ubvDrnv6nXgAE8rVuSujQ2h72Sc2l67Rn1PTxYBV8+cYZaNDQ0bNszdvCMpCdq3h9BQ1C5fZmGtWvz84gVLly6VLy8fP348k5o0wRBg//7/B/wA7t6FJ09El0lBEARB+BdEDTFBEAQhTxkZGezZs4clS5bg7+9P06ZNcXR0xM7OjoULF7Jy5UpKlSrFihUruH37Nk5OTrRs2ZK//vqLsLAwWrVqRdmyZblw4QJFihT56vPfvXs3/fv3Z86cOfz+++9f/frCPxcQEEClvwMfoaGhX32ZrZA3qVSKq6srd+7cQfp3hlKlSpXo0KED+u/Wv/qc3N2zlx4WKQJjx2Z/P3UKzp4lrH17DrZqxUsPD+YcO4ZqVhZTgHigWNGilClTBjMzMwo3aUJZe3vevn1L6siR6D94gEtGBi8AI6AbUAc4ZW5O4sKFPHnyJM/pKKshVtnPj54HDvAUmPfOvoaFCtGmbVvsZs/G2Moqe2OnTtmZbEOGQLNmCueOTk9n4ePHbNq0CRUVFe4YG1MpODi7Jlnr1vD6dXYGXHo63LkDP1DzFkEQBEH4nERATBAEQcglOTmZ7du3s3z5cl68eEHHjh1xdHTE1taWw4cPM2nSJN6+fYujoyM9evRgyJAh3Lp1i4ULFzJ16lTu3LlD69atqVSpEufPn8fQ0PCr34ZLly7Rtm1b+vXrx/bt23NnaAjftKysLAoVKkRiYiInTpzA3t6+oKckvCctLY3Y2Fh0dXXR09P78AH/QlZWFqHHjqG2cCF6AQFoJSXxQlWV7VIpSwF1LS0GlinDpnyCWBfs7Bj68iWvXr2itUTCWJmMWoAxkAq8LVEC/V9+IalbNxw6dcLCwgKrnADWe5QFxNreuEG9c+fyvH5ziQT9jh0ZM2YMrUaMQBISonzHMmUgOJjIyEhWrlzJtrVrGZeWxnADA0ySk1HR1IRGjWD+fMhjeaogCIIgCB8mAmKCIAiCXGxsLBs2bMDZ2ZmoqCh69+7Nr7/+SrVq1fDz82PcuHG4ubnh4ODAqlWruHv3LkOHDsXQ0JB9+/ZhZ2eHl5cXbdu2xcrKirNnz2bXwfnKHj16RIMGDahXrx6nT59GXV39q89B+PeaNGnCzZs3mTp1KvPnzy/o6QhfSXJyMg8fPlSo9/XgwQMSExMBKF68ONbW1tSoUUP+vWLFiqipqSGTyQgPDycmJoaXL19y9uxZDh8+zKtXr1BXVycjIwN1dXWKFi1KWFgYJUqUYPz48fz8888ULlyYK1eu0K1bNwwMDDh27BjBwcH4+Ph8cM7FihWjb9++GBgYyLc9fvyYyZMnc/78+Vz7lyxZkpEjRzJ48GBKliyZ77mjo6NZs2YNq1evJjk5mSFDhjB9+nTKlCnzaXesIAiCIAgKREBMEARBIO1h+DMAAQAASURBVDw8HGdnZzZs2EBqaipDhgxh6tSpmJubk5CQwPz581m1ahVlypRhzZo1NGvWjEmTJrFp0ya6devG1q1bMTQ05Nq1a7Rv356aNWty+vTpL7d8Kh+hoaHY2tpSpEgR3N3dFd6gCt+XX375hS1bttC4cWPOnj1b0NMR3pGamsqrV69QUVGhdOnS/yjoLJPJeP36tULgy8fHh4CAALKyslBVVaVy5coKwS9ra+s8G3NERUXx119/ER8fL98WHBzM8ePHiY2NpXLlyiQnJ/PixQuqVavGlClT6NWrFxoaGshkMjZs2MCECRNo0qQJBw8epEiRIshkMp4+fcqtW7cICgrKdU1jY2Nq166NjY1NnveBt7c3U6dO5cqVK0gkEnJeekskEiQSCe3atWPEiBG0b98eNbW8y/vGx8ezfv16Vq5cSWxsLP3792fGjBlUqFDhU+52QRAEQRD+JgJigiAIP7Dg4GCWL1/O9u3bUVNTY9SoUfzyyy+UKFECmUzGgQMHmDx5MjExMcyYMYMpU6YQGBhIr169ePbsGatXr2b48OFIJBIuX75Mhw4dqFevHidPnkRXV/er3574+HgaN25MVFQUN27c+GDmhfBt27VrFwMGDMDIyIi3b9+KZa/fAJlMxvXr13F3dycjIwMALS0t2rRpk2d3SciuR+jv758r+PX27VsADAwMcmV9WVlZoaWlle98pFIpV69e5fDhw2hqalK4cOFc+yQkJLB3715ev35N69atmTJlCi1btpQ/ntLS0hg7dizbtm1jwoQJLF++XGlgKioqilevXpGamoq6ujrGxsaULl36ox+X7u7u/Prrr9y4cQNVVVUy/+4cmZO5Vrx4cYYMGcLQoUMxNzfP8zxJSUls3ryZZcuWERERQe/evZkxYwZVqlT5qHkIgiAIgpBNBMQEQRB+QL6+vjg5ObF3714MDQ2ZMGECY8eOlb+ZfPz4MePGjePy5ct07tyZVatWYWZmxvbt2xk/fjzlypXjwIED8o5yFy9exMHBgYYNG3L8+HF0dHS++m3KyMigQ4cO3LhxAw8PD6pVq/bV5yB8Xr6+vvIaTsHBwWKJ2Dfg7t27nDx5UulYv379KF++PLGxsbkCX48fPyY9PR2AsmXLKgS+rK2tKVu27EcHljIyMrh06RKHDx/m2LFjREVFUa9ePdq1a5fnMW/evGH06NG5nhfCw8Pp0qUL3t7ebNq0icGDB3/kPfHP5HTgdXR0xMfHB01NTdLS0gBQU1NDIpGQkZFBy5YtGTZsGJ06dUJTU1PpuVJTU9m+fTtLlizh1atXdO3alZkzZ+YbmBQEQRAE4f9EQEwQBOEHcuvWLRYvXszx48cpWbIkU6ZMYfjw4fJsrvj4eObOncuaNWsoV64ca9eupU2bNsTFxTFixAgOHjzI8OHDcXZ2lge9zp49S+fOnWnRogVHjhz5YEbHlyCTyRg2bBi7du3i7NmztGjR4qvPQfj8MjMz0dPTIzU1lUOHDtGtW7eCntIPTSaTsW7dOqKjo5WOx8fHc+TIEUL+LhavqalJ1apVFQJf1atXV2iyER0dTUREBLq6upQqVSrPoFhaWhqurq4cPnyYEydOEBMTg6mpKYULFyYwMJD69evTuHHjPOfeunVr7OzsFLZ5e3vTuXNnpFIpx44dw9bW9hPvkX9OJpNx9OhRZs2ahb+/P/r6+iQkJMgzx4yMjIiOjsbY2JgBAwYwbNgwLC0tlZ4rPT2dv/76i8WLFxMUFETHjh2ZNWsWdevW/Wq3RxAEQRC+RyIgJgiC8B8nk8m4fPkyixYtws3NjYoVKzJ9+nT69euHhoaGfJ+9e/cydepU4uLimDVrFpMmTUJTU5ObN2/Su3dvoqKi2Lp1Kz169JCf++TJk3Tr1o22bdty8ODBPDMZPrf4+HhCQ0PR1NSkTJkyLFy4kN9//52//vqL/v37f5U5CF9H/fr18fHxYfz48Tg5ORX0dH5o6enpLF68OM/xjIwM0tPT5QEwCwuLPGtipaWlceLECfz8/OTbjI2N6dq1K8WLFwcgJSWFCxcucPjwYVxcXIiPj8fc3BxTU1OePXvGmzdvqFixIgMHDsTGxoZbt27lObfOnTtTvXp1+b/3/I+9O4+rMX8fP/467UqSfUlEZYuI7PueJTvJElKyTGU3ZN/3whBJZY0x9n3Lkp2EUiqlUJSKtG/n94df5ztN58R8ZgYz3s/HYx4zc7/v9VTnnPu6r/d17d3LuHHjaNiwIYcPH/5m06vz8vLYu3cvCxYs4MWLF5QvX56EhATU1NTIzs6mfPnyZGRkkJqaSps2bRg3bhyDBw+Wm4Wbm5vL/v37WbZsGc+ePaNbt264uLjQtm3bb3BlgiAIgvD9EwExQRCE/6j8/HyOHz/OihUruHv3Lo0aNeLnn39m4MCBKCsry9Z78uQJkydP5tq1awwaNIh169ahr69Pfn4+69atY86cOZiZmeHr64uBgYFsu8OHDzN06FAsLS3Zv3+/LLj2T8rLy+PMmTMEBATIClMrKyvj4+ODjY0NLi4u//g5CF/X5MmT8fHxoXnz5ly8ePFbn84PLT8/n1WrVsmmPv5RuXLlmDRp0hft68CBA4SGhhZZrqmpib6+PkePHuXkyZOkpqZSr149atWqxatXr3j48CGlSpXCysoKGxsbWrZsiUQi4ePHj7i5ucnqcv2euro6U6ZMQV1dnby8PH7++WfWrFnDqFGj2LZt2zfJav2j7Oxsdu7cyZIlS3j79i16enpER0ejoaFBZmYmpUqVoly5ckRGRqKjo8Pw4cMZN24cjRs3LrKvvLw8fvvtN5YuXcqTJ09o37498+bNo1OnTqIOnyAIgiD8jtK3PgFBEATh75WTk8Pu3btp0KAB/fv3R11dXRZEGjJkiCwY9uHDB6ZMmULjxo15+/Yt58+f59dff0VfX5+3b9/Ss2dPZs6cydSpU/H39y8UDDtw4ABDhgxh4MCB+Pr6fpVgGMDFixd58OABv3+Wk5eXx/Dhw7G3t/8q5yB8XWZmZqSlpXHv3j3EM7xvS0lJqdjafKampl+0n8TERLnBMID09HTc3NwIDQ1l4MCB9OrVi8jISE6dOkWFChXYt28fb968Ydu2bbRq1UoW4NHW1sbS0rJIwEdZWVn2PpicnEzv3r1Zt24dGzZswNvb+7sIhgGoqanh4OBAREQEq1atIjU1FVVVVQwMDFBWViYrK4uYmBhUVVWpXr06v/76K2ZmZjRt2pRt27YV6qyprKzMkCFDCAwM5MiRI6SmptKlSxdatWrF6dOnxd+RIAiCIPx/IkNMEAThPyIjIwMvLy/WrFnDixcv6NmzJz///DNt2rQptJ5UKmXPnj3MmDGD1NRU5s+fj7OzsyyodfHiRUaMGIFUKmXXrl1079690PZ79+5l1KhRWFtb4+XlpXBK1N8tKyuLtWvXkpubK3e8SZMm9O7d+6uci/D1PHr0SFYkPCwsDCMjo297Qj+4jIwMdu/eTVxcXKHltWrVwsrK6oveD4KDgzl06JDC8fT0dPbu3cvr16+pU6cOo0ePZsSIEV80rfHdu3cEBATw4cMHypQpg5mZGbq6uoSEhNC3b1/evXvHgQMH6Nq16+cv9htKSUnB1dWVdevWkZOTg4mJCU+ePCE/Px81NTVSU1MxMTFBQ0ODgIAANDQ0GDp0KHZ2drRo0aJQYFAqlXL27FmWLFnCrVu3MDMzw8XFhb59+6KkJJ6NC4IgCD8uERATBEH4l0tJSWHr1q1s2LCBhIQEBg8ezOzZs+V2Gnv06BGTJ0/G39+fIUOGsG7dOvT09IBPmWULFixg5cqVdOnShV27dslq+RTw8fFhzJgx2NjYsGPHjkJTL/9pcXFxbN++XeG4np4etra2X+18hK8jJyeHkiVLkp2dzb59+xg2bNi3PqUfXm5uLkFBQURGRqKkpISxsTF16tT54uDKixcv8PHxUTj+4MED9PT0GD16NObm5n95mt/JkyextramWrVqHDt2DENDw7+0v68pMTGR1atXs2nTJtTU1DAzMyMgIIDU1FTKlClDQkIC+vr6mJiYEBQURExMDPXr12fcuHGMHDmSsmXLyvZVUE9y6dKl+Pn5YWJiwty5cxk8ePBXfS8XBEEQhO+FeCwkCILwL5WQkICLiwv6+vrMmzcPS0tLnj17hq+vb5Fg2Pv373F0dMTMzIzExEQuXrzIgQMHZMGwFy9e0L59e1avXs2KFSs4e/ZskWCYh4cHY8aMYdy4cXh6en71G6iCTpiKlCxZ8iudifA1qaqq0rBhQ7S0tLh///63Ph0BUFFRoVGjRgwYMIB+/fpRr169P5VppK+vX6jT5B9t3ryZLVu20KxZs78UDJNKpSxfvhxLS0s6derE7du3/1XBMICyZcuyatUqnj9/zsiRI/H390dNTY3evXujoqKCRCJBKpVy7tw5kpKS6NevH/r6+sycOZMqVapgbW3N5cuXyc/PRyKR0KlTJy5fvoy/vz96enoMGzaMevXq4ePjQ05Ozre+XEEQBEH4qkRATBAE4V8mJiYGJycnqlevjqurK7a2tkRFRbF9+/YiN3v5+fn4+PhQu3ZtvLy8WLVqFYGBgXTu3Fm2zm+//UajRo2IjY3l+vXrzJo1q8jN7ZYtW7C3t2fixIm4u7t/k2k27969KzJN6/e+tH6R8O9jZmaGioqKCIj9BwQHBzN79mx27txJRkZGkfGuXbuir6//l4+TlpaGlZUVc+fOxcXFhcOHD6Otrf2X9/utVK5cmU2bNhEWFkbv3r05ceIEKioqjBw5Eg0NDfLy8ihXrhyXLl3izJkzdO7cmTFjxhAQEEDnzp0xNjZm5cqVvHnzBoDWrVtz5swZ7t69S926dRk9ejS1a9dm+/btZGVlfeOrFQRBEISvQ0yZFARB+JcIDQ1l1apV7NmzB21tbRwdHfnpp58KTYn5vYcPHzJ58mRu3rzJsGHDWLNmTaEaPBkZGUydOhV3d3cGDRqEh4eH3KwNNzc3nJ2dcXZ2Zv369d+kS9n+/fuxtbVFXV0dGxsbdHV1C403a9aMHj16iA5q/1Hbtm1j4sSJaGhokJKSIqZ3fUcSEhJ4/Pgx6enpVK5cmQYNGqCurl5oncTERHx9ffH29ub+/fuULVsWa2trhg0bRm5uLm/fvqVkyZKYmppSuXLlv3xO0dHR9OvXj7CwMHx8fBg0aNBf3uf3JjQ0lAULFnDw4EEMDQ3p1asX169fJyAggOrVq5Ofn8/Lly9p0KABvXr1IiYmhsOHD5OTk0OfPn2ws7Oje/fusr+lR48esWzZMg4dOkTVqlWZOXMm48aNo0SJEt/4SgVBEAThnyMCYoIgCN+5Bw8esGLFCg4fPkylSpWYNm0a9vb2CrMdkpOTcXFxwd3dnbp167J582Y6dOhQaJ2nT58ydOhQIiIicHNzw87OTm4wae3atcyYMYMZM2awatWqrx5wSk1NxdHRES8vL9mxVVVVGTZsGH379kVNTQ0TExOqV6/+Vc9L+Lru37+Pubk58CnDqF69et/4jASAW7ducf78+ULLSpUqxciRI9HR0eHs2bP4+Phw/PhxpFIpPXv2ZPTo0fTq1esf60x79epVBg0aRMmSJTl27BgNGzb8R47zvQgMDMTFxYVTp07RoEEDBg0axJUrV/Dz80NPT48yZcrw+PFjypcvL3uYcPDgQR49ekS1atUYO3YsY8eOlWXlhYSEsHz5cvbt20f58uWZPn06Dg4OYkq6IAiC8J8kAmKCIAjfIalUyrVr11i+fDnnz5+nVq1azJw5ExsbmyLZFwXy8/Px9vZm1qxZZGVlsWjRIiZPnoyqqmqh/Xp6euLo6IiBgQEHDhzAxMRE7v6WL1/O3LlzmTt3LkuWLPnqwbDAwECsrKyIioqS1bbR0NAgMzOT6OhoqlWr9lXPR/h2MjMz0dbWJjc3Fx8fH0aNGvWtT+mHFxsbi4eHh9yx3Nxctm/fTnx8PI0aNcLGxgZra2sqVKjwj52PVCrF3d0dR0dH2rZty8GDBylXrtw/drzvzc2bN5k7dy5XrlyhefPmjBgxgsuXL3PkyBEqVqyIoaEhDx8+JCcnh6FDh9K1a1f8/f3Zv38/aWlpdO/eHTs7O/r06YOqqioRERGsXLkSHx8fdHR0mDp1KpMmTUJHR+dbX6ogCIIg/G1EDTFBEITviFQq5cSJE7Ru3ZoOHTrw5s0b9u/fT2hoKPb29gqDYQ8ePKBVq1bY2trSo0cPnj17xpQpUwoFwz58+MCwYcOws7NjxIgR3Lt3T24wTCqVsmjRIubOncuiRYtYunTpVw2GSaVSNm7cSPPmzcnKyiI3NxeAHj16kJGRgY2NjQiG/WA0NDSoX78+pUqVEnXEvhMPHz5UOFZQ2yowMJCHDx/i7Oz8jwbDsrOzcXBwYOLEiUyYMIFz5879UMEwgFatWnH58mUuXLiAVCrlp59+4v379+zbtw8LCwvu3LmDmpoaHTp04MqVK9jY2BASEsLWrVtxd3cnOTmZgQMHoqenx6xZs5BKpezYsYOIiAiGDh3K4sWLqVGjBgsWLCApKelbX64gCIIg/C1EQEwQBOErycnJUdjFKzc3l3379mFqaoqlpSUSiYSTJ08SGBjI0KFDiYuL4+rVq1y7dq1QYfmkpCQmTJiAubk56enpXLt2jd27dxepw3P37l0aN27MmTNnOHDgANu3b0dTU7PIeUilUubNm8fChQtZtmwZ8+fP/3tfhM949+4dlpaWODk50blzZ6Kjo8nPz2fp0qWcO3cOFRUVVqxY8VXPSfg+mJmZoayszL179771qQh8ms5cnPHjx3+VRhdv376lU6dOeHl5sWPHDjZu3FjoQcCPRCKR0KVLF27fvs2xY8dISEjA2tqad+/eceLECWxsbLhx4wZJSUn06tWL7OxsRo4cyfLlyxk0aBD+/v5YWVnh4eGBsbExHTt25MaNG6xbt47IyEjGjBnDmjVrqF69OrNnzyY+Pv5bX7IgCIIg/CViyqQgCMI/LCYmhkuXLhETEwOAgYEBnTt3pmrVqmRmZuLj48Pq1auJjIyke/fuzJkzh7Zt2yKRSMjNzeXw4cOEhIQU2qepqSnx8fHMmTOHnJwcFi9ezKRJk1BRUSm0Xn5+PuvWrWPOnDmYmZnh6+uLgYGB3POUSqXMmjWLNWvWsGbNGqZPn/7PvCAK+Pn5MWLECLKyshg9ejTr16+XZYsdOHCAmzdvygr7Cz+ezZs34+zsjIqKCh8/fvxhgx7fi8uXL3P9+nWF405OTnKbdPyvUlJSiIyMRCqVUqtWLUqVKkVAQAD9+vUjOzubw4cP06pVq7/teP8F+fn5HDhwgPnz58syvZydnTl79iwbN27k48eP9OjRAyUlJc6ePYuqqio2NjbY29sTFBTEjh07uHLlCrq6uowcORI7OzsqVKjAhg0b2Lx5M3l5eYwfP57p06cXatgiCIIgCP8WIiAmCILwD4qOjmbXrl3k5+cXWq6iooKKigqurq68ffuWQYMGMXv2bMzMzAqtd/HiRW7cuCF33+fPn8fQ0JBVq1ZRqVKlIuPx8fGMGjWKc+fOMXPmTJYuXaowiCCVSpk6dSqurq64ubnh6Oj4P17xn5ebm8uiRYtYtmwZ7du3p3fv3rJg3JIlS6hZsybDhw9HQ0ODly9f/nBToYRPbt26JQt4BAYGfpXsI0GxDx8+8Msvv8jNeq1Xrx6DBw/+W44jlUq5cuUK169fp+Arq0QiQVdXl5kzZ2JiYsKRI0fQ09P7W473X5STk4OPjw+LFy/m9evXjB49mmnTpnHu3DnWrVvH69ev6dGjB1WrVuXEiRPEx8fTs2dPnJ2dqV69Op6ennh7exMfH0/z5s2xs7Oja9eu7Ny5Ezc3N9LT07G1tWXWrFmiwYkgCILwryKmTAqCIPyDLl26VCQYBp+CQEFBQfTs2ZOQkBAOHjxYJBiWn5/PgwcPFO7b0tISHx8fucGwixcvYmpqysOHDzl79iyrVq1SGAzLz89n8uTJuLq6smXLlq8aDIuOjqZ9+/asWLGCJUuWMGDAAFkwbNq0afz0009MmTIFJSUlZsyYIYJhP7CGDRuipKSERCIRdcS+Azo6OgwbNqxI90FjY2MsLS3/tuMEBgZy7do1fv/8ViqVkpSUxNixY7l27ZoIhn2Gqqoq48aNIywsjPXr13Py5EkaN25MVFQUN27cwNPTk8jISDw9Palbty7Tpk3j9evXdOvWjf79+1OrVi3CwsL47bff0NXVxc7Ojvr16xMXF8fhw4dZsGABv/76K4aGhtja2hIREfGtL1kQBEEQvogIiAmCIPxDsrKyePnypcJxIyMjPDw8qF27ttzxjIwMMjMzFW6fmZlZJNiWk5PDnDlz6NatGyYmJjx69Iju3bsr3Ed+fj4ODg5s3boVDw8PJkyY8Jmr+vv89ttvNGrUiFevXnHt2jU0NDRwdHREIpFga2vLmjVrWLRoEcnJyZQsWZKpU6d+tXMTvj9aWlrUqVOH0qVLizpi3wkDAwOcnZ2xtramX79+TJgwgWHDhils/vG/uHPnjsKxevXqUaJEib/tWP91GhoaODk58fz5cxYsWMDu3bupV68eYWFh+Pv7c+jQIVJTU1m3bh3KysosXLgQIyMjHBwcqFmzJvfv38fDw4OoqCimTZvG6dOn6dSpEwcPHuTnn39mwYIFnDp1itq1azNixIgiU/0FQRAE4XsjAmKCIAj/kL/amVFDQwM1NTWF49ra2igp/d/b+IsXL2jfvj2rV69m+fLlnDt3Tm72WIG8vDxsbW3ZsWMHXl5ejBs37i+d75dKT09n/PjxDBo0iM6dOxMYGMjly5eZPn06ysrKDBo0iG3bthEUFMTGjRvJz89n5syZf2s9IuHfqaCwvsgQ+34oKytjZGSEqanpP9JJMiEhQeHYu3fvEJU//rySJUsyZ84coqKimDJlCps3b8bQ0JCnT59y+fJlLl68iK6uLgsXLiQ4OJilS5cyfPhwNm/ejIGBAbNnz8bCwoIXL15w6tQpatasycyZM1m2bBmdO3fmp59+4sqVK9SvX5/Bgwfz6NGjb33JgiAIgiCXCIgJgiD8Q9TU1NDX11c4XqtWrUIBrT9SVlamcePGCsebNGki+++CbKvY2FiuX7/O7NmzC+1bKpWSlpZGdnY28GnKpo2NDbt27WLPnj3Y2Nj8mUv7nwUFBdGsWTN27drFtm3bOHjwIGvXrmXevHmoqanRtWtX9uzZg5KSEhMnTkRbW5vSpUt/1WmcwvfLzMyMDx8+8OjRI7Kysr716QhfgY6OjsKxUqVK/eUHDz+y0qVLs3TpUiIjIxk7dizLli2jVq1aPHr0iBMnTnD37l1MTU1xcXHh0KFDsqDXvXv3aNGiBa1btyYlJYUDBw7w6tUrFixYwJ07d3Bzc0NLS4uBAwdy9+5dGjVqRN++fUVmpyAIgvDdEQExQRCEf1Dnzp1RVlYuslxVVZVOnTp9dvtOnTrJ7QpZp04d2rRpQ0ZGBhMmTGDQoEF06dKFwMBAWrZsWWjdhw8fsmnTJtauXcvKlSvZt28fY8aMwdfXl/3792Ntbf2/X+AXkkqluLu7Y25uLqsBZWdnx/Tp01m+fDklSpTA3Nyc3377DTU1Nfbs2YO/vz8pKSn8/PPPaGtr/+PnKHz/mjRpQk5ODrm5uTx58uRbn47wFfyxtuLv/f6hgPC/K+gcGR4eTv/+/Zk5cyaGhoYEBASwf/9+QkJCsLCwYPHixaxYsYIhQ4awe/dutLS0GDZsGAYGBnh7e2NnZ0dYWBiXLl2iSZMmHD9+nLi4OMzNzQkICKBZs2b06NEDf3//b33JgiAIggCILpOCIAj/uJcvX+Ln50dUVBQSiYSaNWvSuXNnKleu/EXbS6VSIiIiiIyMRCKRYGRkRI0aNQgJCWHo0KFERETg6uqKvb19kWyJu3fvcubMmSL7TE1NxdzcnCFDhvwt11icpKQk7OzsOHz4MBMmTGDdunWoq6szadIk3N3dKVWqFDVr1sTPz4/SpUvz/v17ateujaamJpmZmTx//hxNTc1//DyF719KSgo6OjooKSmxefPmr1rzTvg28vLyOHz4ME+fPi20vE6dOgwaNEjuAwfhr4mIiGDBggXs378fAwMDFi5ciLW1NXFxcWzYsIFt27aRn5+Pra0tvXr14tChQ7LM3pEjR+Lk5ES9evVITExk9+7deHh48PTpU8qXL49EIiE+Pp4OHTrg4uJCp06dRJafIAiC8M2IgJggCMJXkpubi0Qi+cs3cFKpFE9PTxwdHTEwMODAgQOYmJgUWS8nJ4f169crLMzfsmVLunXr9pfO5XP8/f2xtrbm48ePeHp6MmDAAPLy8hg3bhze3t6UK1eOMmXKcP36dVn9IUdHRzw9PcnIyGDjxo1Mnjz5Hz1H4d/F2NiY5ORkLC0t8fT0/NanI3wFUqmU6OhowsPDATA0NKRGjRoikPIPe/LkCfPnz+fo0aPUq1ePJUuW0L9/f5KSkvjll1/YuHEj79+/x9raGjs7O65evcqWLVuIi4ujW7duODk50aNHDyQSCbdv38bDw4MDBw6QmZlJyZIlSUlJoUWLFsybNw8LCwvx8xQEQRC+OhEQEwRB+A5JpVICAwO5e/cuiYmJlCpVCjMzM+rUqcPEiRM5cOAAdnZ2uLq6Ksyeio6OxtvbW+ExKlasiIODwz9y/nl5eSxbtoxFixbRqlUr9u7di76+Pjk5OYwaNYqDBw9SuXJllJSU8Pf3l9VaCwwMpEmTJjRs2JDExETCw8P/1o51wr+flZUVV65coWLFiqJYtyB8BXfv3sXFxYULFy7QpEkTli1bRrdu3UhPT2fHjh2sXbuWV69eYWlpybRp03j58iWurq7cv38fY2NjnJycGDVqlCwItn//fjw8PHjw4AGqqqrk5ORQv359lixZQt++fYutrSkIgiAIfyfxiSMIgvAdunjxIsePH+fNmzfk5OSQmJjIhQsXmDZtGmfOnOHAgQNs37692KmEn7up+Keexr969YpOnTqxaNEiXFxc8PPzQ19fn6ysLIYOHcqhQ4cwMDAgOzubCxcuyIJh+fn5TJw4EQMDAwIDA5k/f74IhglFmJmZ8f79e4KCgkhPT//WpyMI/3nNmjXj/Pnz+Pn5oa6uTo8ePWjfvj0BAQE4OTnx/PlzvL29CQ8Pp3379mzfvp3Fixdz/fp1TE1N+emnn6hWrRozZswgOTmZ8ePHc//+fQICAhg3bhxaWloEBwczYMAAatSowZ49e8jLy/vWly0IgiD8AERATBAE4TuTlJTEzZs35Y7VqFGD06dPf1HtrypVqqClpaVw3MjI6H8+R0WOHTuGqakpz58/5/LlyyxatAgVFRUyMzMZMGAAp06dom7duiQkJHDu3Dlq164t29bHx4dbt25RuXJlatas+dU6Xwr/Lk2aNCErK4v8/HwCAwO/9ekIwg+jQ4cO+Pv7c+rUKT5+/Ei7du2wsLDgyZMn2NjYEBQUxJEjR8jMzKRnz55MnjyZAQMGEBYWhp2dHTt27KBmzZoMHjwYf39/GjVqxJYtW4iPj8fb25sGDRrw8uVLRo4cSfny5Vm5ciW5ubnf+rIFQRCE/zAREBMEQfjOhIWFFTv+4cOHL9qPsrKywhphurq6tGjR4k+fmyKZmZn89NNP9OvXj7Zt2/Lo0SPat28PQFpaGr179+by5cuYmZkRERHBqVOnaNy4sWz75ORkZs6ciYWFBf7+/ixcuBBVVdW/7fyE/46C3xtlZWXu37//jc9GEH4sEomEnj178uDBAw4ePMiLFy9o2rQpgwYNIjQ0lH79+nH79m0uX75MxYoVGTZsGN27d6dWrVpERESwefNmnjx5Qtu2bTE3N2fPnj2oqKhgY2PD48ePefr0KdbW1qSlpfHzzz9TsmRJxowZw/v377/1pQuCIAj/QSIgJgiC8J353FSRPzOVpGHDhgwbNgw9PT0A1NTUaNy4MWPHjv3bOjeGhITQvHlzPDw82Lx5M0eOHKFs2bLAp66APXr04Pbt27Rt25b79+/z22+/0aZNm0L7mDt3LtnZ2WRmZlKnTh2sra3/lnMT/nvKlClDjRo1KFeunAiICcI3oqSkxODBg3ny5AleXl7cv3+fBg0aMGrUKKKioujYsSPnzp3j/v37mJmZMWHCBExMTPj48SO3b9/m9OnTlCtXjpEjR1K9enWWLFlCfHw8devWZe/evaSkpLBq1Sp0dHTw9vamTJkytGvXjtu3b3/rSxcEQRD+Q0RRfUEQhO/M69ev2bFjh8LxgQMHyu0q+TkFb/d/V+0wqVTKzp07cXR0RF9fH19fX0xNTWXjycnJ9OjRg9DQULp06cKRI0fw9fUtMt3zwYMHmJub89NPP7Fx40YOHDjwRVNChR/XwIEDuXPnDqVKleLp06ff+nQE4YeXlZXFjh07WLp0Ke/evWPcuHG4uLhQtWpV4FPm85o1a/Dx8UFTU5OJEyfi5OREYmIiGzduZNeuXeTn5zN8+HCcnJxo2LChbN/nzp1j+vTpBAUFAaCnp8eMGTMYPXo0pUqV+ibXKwiCIPw3iICYIAjCd0YqlXLw4EFCQ0OLjFWuXBlbW1uUlZW/wZn9nw8fPjB+/HgOHDjAuHHjcHV1LVSvLCEhgW7duvHy5Uv69euHp6cn27dvx87OrtB+8vPzadmyJZmZmejq6pKcnMzDhw9FlzGhWMuXL2fp0qVkZGSQkpKCtrb2tz4lQRCA9PR0fvnlF1auXEl6ejqTJk1i9uzZlCtXDvj0wMfV1RV3d3dyc3MZO3Ys06dPR0dHR5Zl/OrVKzp27IizszO9evWSfd6Fhoby008/cenSJaRSKaqqqgwdOpSJEyfSokWLf6xRjCAIgvDfJe44BEEQvjMSiYSBAwfSvHlzWR0tJSUlGjZsyIgRI755MOz27ds0atSIM2fO4Ovri4eHR6Fg2Js3b+jQoQNxcXGMHj0aT09PVq9eXSQYBuDp6cndu3cZO3YsV69eZcmSJSIYJnyWmZkZGRkZAAQEBHzjsxEEoYCmpiYzZswgMjKSWbNmsX37dgwMDFiwYAEfPnygatWqrFmzhpiYGObOncvBgwcxMjLC0dGRnj17EhkZia+vLxkZGfTt25fatWvj5uZGSkoKderU4cKFC0RFRTFy5Ejy8/PZt28frVq1ol69eri5uZGYmPitXwJBEAThX0RkiAmCIHzHcnJy+PjxI1paWqirq3/Tc8nPz2f16tW4uLhgbm7Ovn37MDAwKLTOy5cv6dy5M+np6Tg4ODBv3jzmzJnDsmXLiuwvMTERY2Nj+vTpQ1hYGLm5udy5c0c85f9Opaen8+LFCyQSCTVq1KBEiRLf7Fzevn1LpUqVUFNTY/ny5UybNu2bnYsgCIq9e/eOVatWsXnzZkqUKMGsWbP46aefZDUs09PT8fT0ZO3atcTExNCrVy9+/vlnWrduzZ07d3Bzc+PXX3+lRIkS2Nra8tNPP1GzZk0AYmNjWb16NVu3biU/P5/8/HxUVVUZMGAAdnZ2tG/fXjxgEQRBEIolAmKCIAjCZ8XFxTFy5EguX77M7NmzWbRoUZEukFFRUXTq1AmpVMr06dNxdHRkwoQJbN68WW6Qq2DK5datW7G2tubs2bN07979a12S8IWkUinXr1/n2rVrsoYOKioqdOjQgdatW3+z8ypoFNG2bVv279//zc5DEITPi42NZdmyZWzfvp1y5coxd+5c7OzsZA96cnJy8PX1ZeXKlTx9+pQ2bdowe/ZsevbsSWxsLFu2bMHd3Z3k5GQsLS1xdnamffv2SCQS4uPjWb9+PZs2bSI7OxttbW2Sk5OpVasW48aNY/To0VSqVOkbvwKCIAjC90gExARBEIRinTlzBhsbG5SVldmzZw+dO3cusk5YWBidO3dGQ0ODefPmMW7cOAYPHszu3bvlPqG/e/cuLVq0YOPGjXh5eaGpqcm1a9dEdth3KCAggBMnTsgd69+/f6Hi11+TpaUljx49QlVVlYiIiG9yDoIg/DlRUVEsWrSI3bt3U61aNRYsWMDIkSNRUVEBPmUinzx5khUrVnD79m0aNGjA7NmzGTJkCNnZ2ezduxdXV1eePn2KqakpTk5ODBs2DA0NDZKSknBzc8PV1ZWMjAwMDAyIjo4mLy+PPn36MG7cOLp37/7Nyw4IgiAI3w+RRywIgiDIlZWVxdSpU+nZsyfm5uY8fvxYbjAsODiY9u3bo62tzZo1a3BwcKB79+54e3vLDYbl5eUxceJEGjVqRKVKlQgICGDp0qUiGPadunXrlsKxmzdvfsUzKaxJkyYkJyfz/PlzkpOTv9l5CILw5QwMDPD29iYoKAhzc3PGjh2LiYkJBw8eJD8/HyUlJSwtLbl58yZXrlyhatWqDB8+HGNjY3x8fBgxYgRBQUFcuHABPT09xo4di76+PgsWLCA7O5tFixYRExPDwoULSUxMJDc3F3Nzc0JCQujVqxcGBgYsXLiQmJiYb/1SCIIgCN8BkSEmCIIgFBEeHo6VlRVPnjxh1apVODk5yQ1uBQYG0rVrV6pUqcKGDRsYMGCArOC+ohpT7u7uTJgwgRs3bjB+/HgqVqzIxYsX/+lLEv4HeXl5LF26tNh1zp07R/Xq1TE2NsbExIQaNWpQuXJlKlasWGRa7d/pxIkTssBqt27daNCgAU2aNBEdJwXhXyQgIAAXFxfOnDmDqakpS5cupVevXoUekAQGBrJy5Up+/fVXypUrh7OzMxMnTkRHR4ewsDA2btyIt7c32dnZWFlZ4ezsjJmZGampqWzbto01a9YQHx9P9+7d0dbW5syZM6SlpdG9e3fs7Ozo06fPP/peJQiCIHy/REBMEARBKGT37t1MnDiRSpUq4evrS5MmTeSud/fuXbp3746hoSG//PILvXv3pnr16ly6dIlSpUrJ3SYhIYHatWszYMAAunTpwrBhw7h58yYtW7b8Jy9J+B9JpVJWr15NZmam3PH09HTWrFmDoq8SmpqalClThipVqlC9enVq1qxJlSpVqFy5cqF/Cgps/xnHjh0jMDCw0DJ1dXVGjBghqy8mCMK/g7+/P3PnzuXatWu0bNmSZcuW0bFjx0LrREREsHbtWry8vNDQ0GDChAk4OztTqVIl3r9/j6enJ5s2bSI6Opq2bdvi5ORE3759ycnJwdPTk1WrVvH69Wv69etHw4YNOXv2LHfu3KFChQqMHj2acePGYWRk9I1eAUEQBOFbEAExQRAEAYCPHz8yceJE9uzZw6hRo9i8ebPCbBt/f3969uxJgwYN2LZtGz179qRkyZJcu3aNcuXKKTyGra0tR44cITg4mA4dOmBkZMTJkyf/qUsS/gZnzpzh7t27csdatWpFly5dePPmDeHh4Tx69Ijg4GDCw8OJiYnh7du3fPz4Uba+RCKRGzzT0tKiSpUqVK1atUiw7Pf/6OjoIJFIuHfvHqdPn5Z7Trq6ukyePFl0lxOEfxmpVMqFCxeYO3cu9+/fp0uXLixbtoxmzZoVWi8uLg5XV1e2bt1KdnY2Y8aMYfr06dSqVYvc3FyOHz+Oq6sr169fp3r16vz000/Y2tpSokQJfHx8WLFiBS9evMDS0hIrKytu377N7t27SU5OpkOHDtjZ2TFgwAA0NDS+0SshCIIgfC0iICYIgiBw//59rKysePv2LVu3bmXEiBEK17106RKWlpY0b94cT09PevToQXZ2Nv7+/lStWlXhdrdu3aJVq1a4u7ujrq7OmDFjePDgAWZmZv/EJQl/k8zMTPbs2cPr168LLa9WrRojRoxATU2t2O3T09N58eIFkZGRREZGEhERQVhYGBEREbx8+ZLs7GzZuhoaGqipqSGVSsnKyio0VjBuYGDA4MGDiw14jRkzBn19/f/hagVB+NakUilHjx5l3rx5BAcHY2lpyZIlS4o08Hj//j1btmzB1dWVxMREhg4dyqxZszA1NQU+Tcd0c3Nj//79qKmpMXr0aBwdHTEwMGDfvn0sX76csLAwunfvzsyZM4mLi2PHjh1cuXIFXV1dRo4ciZ2dHSYmJkXO8e3btzx//hyJRIKxsTFly5b9Kq+N8GORSqU8evSIe/fukZSUROnSpWnatClmZmai7qog/E1EQEwQBOEHlp+fz4YNG/j5559p2LAhvr6+GBoaKlz/zJkz9O/fn44dO7Jz50569uxJXFwc/v7+xW5XUNhYRUWFa9euUb9+fRo3bsxvv/32T1yW8DfLy8vjyZMnPH/+HABDQ0NMTEz+crc2qVTKmzdvZMGy58+fy/4dERFBfHy8bF2JRIKysjLNmzena9euxe73zZs3GBkZ0ahRI0xNTcXNqiD8C+Xl5eHr68v8+fOJiorCysqKRYsWFZnWmJGRgZeXF2vWrOHFixdYWFgwe/Zs2rZti0Qi4c2bN2zdupWtW7eSkJBAr169cHZ2pkOHDhw6dIhly5YRFBREhw4dmDdvHnp6enh6euLt7U18fDwtWrRg3LhxDB06FE1NTU6cOFFkunaLFi3o1q2bCFIIf6uLFy9y48aNIsubNWuGhYXFX95/Xl4eSkpK4vdW+KGJgJggCMIP6u3bt4wePZqzZ88ybdo0li9fXmy2z5EjRxg6dCg9e/bEy8sLS0tLgoODuXr1Kg0aNCj2WJs3b8bR0ZE7d+4QEBDAhAkTePz4sdwn78J/m1Qq5f3798TExBT6Jzo6mpiYGF68eEFcXJzC7Xv37k3Tpk2L3f/x48cJCQmR1T7T09PD1NSURo0ayYJktWrVEtMqBeFfICcnBy8vLxYvXsybN28YM2YM8+bNK5IFmpuby4EDB1i5ciVBQUG0atWK2bNn06tXL5SUlMjMzMTX1xdXV1cePXpE/fr1cXJywtramvPnz7N06VICAgJo1aoVLi4udO7cmZMnT+Lh4cG5c+fQ0tLC3t5eYY3MPn36iIznH0xycjKRkZFIJBJq1aqFjo7O37bvpKQkNm3apHB8woQJVKhQ4X/ad3h4OFevXuX169eoqqpSr149OnfuLJrSCD8kERATBEH4AV24cIGRI0eSn5+Pj4/PZ580+vr6MmLECAYOHMjOnTsZNGgQ169f5+LFi7Ro0aLYbd++fUvt2rWxsrLC1dUVQ0ND2rdvz969e//OSxK+wPv373n06BEfPnygbNmyNGrUCC0trb/1GDk5Obx+/bpQsOvFixc8f/6cFy9eEBsbW6hIf0Hml1QqJS8v77P7b926dbEZYlFRUfj4+ACgo6MjK9qflZXFmzdvSExMBD7VLWvYsKEsQNaoUSMaNGjwPxX4FwThn5eZmcnWrVtZvnw5KSkpODg4MGfOHCpWrFhoPalUyqlTp1ixYgU3b97ExMSEWbNmMXToUFRVVZFKpVy9ehU3NzeOHTtGmTJlsLe3Z+LEiTx+/JglS5bw+PFjBgwYQP369dHU1KR8+fI8e/aMtLQ0SpYsKff8KlWqxPjx47/GSyF8Y1KplHPnznHnzh3ZMolEQps2bejYsePfknF1584dzp49q3C8c+fOtGnT5k/v9+nTp/z6669FlpcuXRp7e3uFHcIF4b9KBMQEQRB+IDk5Obi4uLB69Wq6dOnC7t27qVSpUrHbeHt7Y2try4gRI9i+fTujRo3i6NGjnDp1ii5dunz2mDY2Npw6dYpnz56xZ88epk6dSkhICMbGxn/XZQlfICgoiKNHjxYKOqmpqWFlZYWBgcFnt8/MzOTGjRsEBQWRmZmJhoYGOTk5hIeH8/z5c169esWbN2/48OFDoe2UlJTIz88vsj9FBfY/p2TJkjg6OsrNZszMzGTdunWsWbOGGjVq8OTJEx4/fsyTJ08ICwuTnUflypUpU6YMEomE9+/fExsbS35+PkpKSoWmWhZklFWqVElMKRGE78THjx9xc3Nj7dq15OTk4OTkxIwZM9DV1S2yrr+/PytWrOD06dNUr16d6dOnM3bsWFngOzIykk2bNuHp6UlGRgaDBw/GwcGBW7duFemuq6GhobDjLnx6P/3555//3osVvkvFBav69u1Lo0aNFG6bnZ1NbGwsysrKVKlSRWHpgVu3bnH+/HmF++nYsSPt2rX7U+ctlUrZuHEj79+/lzvevn17OnTo8Kf2KQj/diIgJgiC8IOIjIxk2LBhBAQEsHTpUmbMmPHZKWPu7u5MmDABe3t7tmzZgoODAzt37uTQoUP079//s8e8fv067dq1w8PDg2HDhlGrVi169uzJzp07/67LEr5ASkoKGzdulJuBpampiZOTE2pqauTk5PDy5UuePn1KUFAQYWFhvHjxgvj4eDp06ED58uULbZubm8uePXt48eIF8Cn4VapUKXR0dFBXVyc7O5vU1FTev39Pbm5ukWPr6OhQoUIFlJWViY6OJiMjQ2EA7fcMDQ0ZPHgw6urqsmXv379n9+7dpKenk52dzZEjR+jTp49sPCMjg5CQEFmArODfb9++BUBdXZ1KlSpRokQJsrOziYuLIyMjA4Dy5csXCpKZmppSu3ZtVFVVv+wHIAjC3y4pKYm1a9fi5uaGqqoqM2bMwMnJSW4G16NHj1i9ejW+vr6ULVsWJycnJk6cKAuipaSk4O3tzcaNGzEyMlKY+VxcIF9JSYlp06aJLNN/qezsbKRSaaHPFUU2btxIcnKy3DFFmYJSqZSbN29y7do1WcMYbW1tevbsSZ06dYqsHx8fz9atWxWew7hx44ptZCRPYmIimzdvVjherVo1xo4d+6f2KQj/diIgJgiC8APw9fVl/PjxlC1blv3799O8efNC4+np6QQFBZGSkkK5cuWoV68eW7ZsYcqUKTg6OrJhwwZmz57NmjVr8Pb2xsbG5rPHzM3NxczMDE1NTW7evMnatWtxcXEhLCyMGjVq/ENXKsjj7+/PpUuXFI6fOXOGgIAAcnJy5I63b9+ejh07yh2TSqWEhoYSERFBdHQ0iYmJRW4Yy5QpQ61atWjSpAm1a9cmISGB27dvc/36dXJycmRBsIJ/f0n2mIaGBvXr10dbW5v4+HiePXuGqqoqmZmZlClThtTUVE6fPk3nzp2L3U98fHyhANnjx48JDg6WZYLo6OhQpkwZlJSUeP/+vWzKpbq6OiYmJoWCZKampn9rDRlBED7vzZs3rFixAnd3d3R0dJgzZw4ODg5oaGgUWTcyMpK1a9eyc+dO1NTUcHBwwNnZmSpVqgCfioyvWrVK4XthwfuUPKdPnyYmJoa5c+diZ2f3RYEV4dt7/fo1Fy5cIDo6GvhUc7JLly5Ur15d7vr5+fksWbJE4f5UVVWZM2dOkeX379/n1KlTRZZLJBLGjBlDtWrViowdO3asSAMHgPr16zNw4MBCmcu5ubm8ePGC7Oxs9PT05Na6+1xdMn19fcaMGaNw/I8SEhK4efMmr169Qk1NDRMTE1kDJUH4txABMUEQhP+wtLQ0fvrpJ7y8vLCyspLdMPxeaGgohw8fLnIDsG3bNkaNGsWKFStYuXIlc+bMwdXVFScnpy86tqurK1OnTuX+/fsYGhpiYGDA0KFD2bJly992fcKXOX36NPfu3VM4fvv2beLi4qhatSo1atTA2NiY+vXrY2BgQKVKldi3bx+xsbEKt9+0aRNJSUno6upSo0YNGjVqRLt27WjSpAmGhoZERkZy5MgR9u/fT3BwcKFty5cvT0pKCllZWYD8KZaKpiqpqanJnrTDpxsRXV1d4uPjKV++PKmpqVy8eJFWrVp90etUIC8vj+fPnxfJJivosqmkpES5cuVktckSEhJkGXAF1//7jLLq1auLKZeC8A+LiYlh8eLFeHt7U7lyZebPn8/o0aPlZnK+ffsWV1dXtmzZQmZmJjY2NsycORNDQ0PWrl1LWlqawuO8f/+e0qVLF1pmbm6OsbExS5YsYc+ePVStWpX58+djY2MjMkm/Y2/evMHT07NIBrOysjI2NjZyg1QA69ev5+PHj3LHypYty+TJkwst+9xUxTp16jB06NAiy/Pz87G3t6dcuXKUKFECLS0tmjZtStu2bQtNtQwNDeXEiROkp6cDn4JsZmZmWFhYFFpPKpWyZcsW3r17J/c8/kxdspiYGHbv3l3ktatRowYjRoz4y12oBeFrEQExQRCEf7m8vDzCw8N59+4dOjo61K5dGzU1NQIDA7GysuLly5ds2rSJMWPGFLkpT0lJYdOmTXKnswHMnTsXDw8PJk6cyMKFC1mwYMEXnVNcXBy1a9dm5MiR/PLLLyxZsoRly5bx/PnzP53iL/x19+7d4/Tp0wrHhw0bVmxNN3d3d9nUQnksLCxo2rSpLHMiNzcXf39/9u3bx9GjR0lISJCtq62tjampKYmJiYSEhMiywXR0dIrUHytbtiyZmZmym9NSpUqRkpIiG/9jQKxSpUq8efMGDQ0NsrOz0dHRIS8vDz8/v7+l+1tqairBwcFFAmVJSUnAp8Cdrq4uEomEDx8+yM5bR0dHlkFWECyrV6+e3AwWQRD+mrCwMBYsWICvry+1atVi0aJFWFlZyb1B//DhA+7u7mzYsIGEhAQGDRpE165def36tdx9v3v3jry8PLp06YKfnx+BgYEkJiYybtw4xo8fj5aWFqGhoSxcuJADBw5Qs2ZNFi5ciLW1tQgQfIcOHDhAaGio3DEDAwNGjRold+zKlStcvXpV7li3bt1o2bJloWXp6emsWbNG4XmUKlWKKVOmFFkeHh6OsbExnp6ejBw5EhUVlSLf4+Li4tixY4fcUgOtWrUq0oTm9u3bnD59usjvY4UKFRg7duwXZTZKpVLc3d2Jj4+XO96rV69iu0ELwvdEBMQEQRD+xRISEti/f3+hWhaampooKSkxZ84c6tati6+vr9z6FADXrl3Dz89P4f4rV66Mg4ODbNrkl2a5jBgxgvPnz/Ps2TOkUikGBgaMHTuWDRs2/LkLFP4WmZmZbNq0Sfb0+PfKli3LxIkTi60nd+bMGe7evSt3TEtLiylTppCamsqpU6fw8vLi+vXrsowviURC7dq16dWrF2lpaRw6dEj2dFpTU5Nq1aoRFhZWaIqksrIyEyZMwN3dXRasbdeuHdeuXZOtU6VKFVnWmrKysqw+WteuXblz5w4pKSloaGigoqKCmpoa169fp169en/mZfsiUqmU2NjYItMuQ0JCZFmXpUqVkmWTFfytKikpUa9evUJBMlNT0yJ12gRB+N88fvyYefPmcfz4cUxMTFiyZAl9+/aV+zmWkZGBj48Pq1evJjU1lYkTJyr8vNu9ezdRUVEMHTqUYcOGcfToUXbt2kXp0qWZOnUqkyZNolSpUjx+/Jj58+dz7Ngx6taty6JFixg4cOBna3cKX8/KlStln1V/JJFImDdvntzfg7y8PA4dOlQkmNawYUP69u1b5Gecm5vLihUrFNbHrFixIg4ODkWWOzs7s3fvXl6+fKnwAYqiaZXw6aHR9OnTZVmK2dnZtG/fnpycHCZPnsybN29QU1Ojfv36tGvX7os7TH6uFlnNmjUZOXLkF+1LEL41McFXEAThXyovL69IMAw+PYnMzMzEwcGBVatWFZuFoqgobIFdu3ZhY2PD+vXrvzgYduXKFfbu3YuXlxe6urrMnTuX3NxcZs+e/UXbC38/DQ0NrK2tOXjwYKEMq/Lly2NlZfXZG7RWrVrx5MkTWZH530tPT6dhw4aEhITIglqampr07t2bnj17kp+fT1BQEL/++isxMTEANG7cmLJly+Ln58ezZ88K7a979+7069ePCRMmyJaNGzcOT0/PQuuNHj2a5cuXA1C1alXZvocNG8b9+/dlUxo/fPhAeno6nTp14ubNm9SsWfNLX7YvIpFIqFq1KlWrVqVHjx6y5Tk5OTx79qxQoOzJkyeF/uZiY2OJi4vj4MGDsuBZ5cqVi0y5NDQ0FNklgvAnNWzYkGPHjnH79m1cXFzo378/5ubmLF26lK5duyKRSMjIyCAgIICXL19SoUIFTpw4wb1792T1pOTZtGkTkZGRrFmzhv3799OzZ0/27t2Ln58fCxcuZPXq1Tg5OeHo6MjRo0e5d+8e8+fPZ8iQIZiamrJ48WL69OkjplF/B4p7Xy3uc1FZWZkhQ4YQExODk5MTjRo1wtbWFj09Pbk/VxUVFerXr8+TJ0/k7q9hw4ZFlqWmpuLl5cWkSZOK/R5XXPZ2dnY2ycnJVKhQAYDp06fz4MED/P39adasmcLtCo6fkpKCrq5ukUCZolkFXzoulUrJzc2Vm/EmCF+byBATBEH4lwoNDeXAgQMKx7t27frZ2klXr17lypUrCsdfvHiBh4fHFxdIzcnJoVGjRpQuXZrLly/z4cMHatasyeTJk1m5cuUX7UP45+Tl5REREcGHDx8oV64cBgYGX/xlNCEhgTNnzhAVFQV8mm57+fJl2ZNpAwMD+vfvz7Bhw6hUqRJeXl5FnoZnZGSQlJTErl27ijyVL1euHBcvXsTX17fQ74qzszO//vqrbAqTiooKUqmUc+fO0aVLF+BT3ZNLly6hoqKCo6MjTZo0Yfjw4SgpKeHg4CALBFaoUIFbt26hp6f3P71+f4f3798TFBRUZNplQT0aDQ0NWafLgimXJUqUoGHDhoWCZA0aNJDbTU8QBPkuX77M3LlzuX37Nu3bt2fu3LmF/vYKVK9evdiAWEG9p5ycHA4ePMjKlSsJCgqiVatW2NnZERgYyPbt21FRUWHSpElMmTKFChUq4O/vj4uLC1evXi0SmBP+vIyMDPz9/QkODiY7Oxt9fX3atm37p8oyHD9+nIcPH8odq1+/PoMGDfrsPsqUKcOsWbOYNWtWseulpqbi7e0ta8xSoFatWlhZWRX5nrVlyxYcHR2JiopSWMsMYN++fYSHhyscnz59OlpaWvj6+jJs2DB++eUXJk6cWOx5njx5UvawSklJiYYNG2JhYYGamhrw6bvE+vXr5WadA7Rt25ZOnToVWS6VSgkICODmzZskJSWhrq6OqakpnTp1Ek0ohG9GBMQEQRD+pT7XObBx48ZYWloWu48PHz6wadMm2XSz38vKymLu3LloaWl98TmtWbOG8+fP06lTJ7Kzs8nPz+f69escOHBATAX7juTk5PDkyRNevnyJmpoa9erVU9hRKyMjg4MHD7Jz507ZtEk1NTWysrJo3rw5NjY29O3bl7JlyxIUFMTixYvR0NCgVq1acvd38OBBnj17JvudU1JSYt26dfz0008MHTqU3377TVZY38nJCQ0NDVatWgUgqzfWo0cP7OzsGDhwIAATJ05ky5YtSCQSGjduzP379xk0aBBnzpwhLy+P48ePM2bMGOLj49HX1+f27duyJ+bfA6lUSnR0dJFpl2FhYbLXSUtLCyUlJVJTU2WZeIaGhjRu3LhQp8uqVauKG2xBUEAqlXLq1Cnmzp2Lqampwvep4mRnZ2Nvby97z5RKpZw+fZqVK1fi7+9P/fr1cXBw4MWLF2zbto28vDwcHByYMWMGlSpV4vLly8ybN49bt27Rtm1blixZQvv27f/uS/1Py8rKYufOnUVqWCkpKTF8+PAvzgROSUlhx44dRYKimpqa2NraUqZMmc/uQ11dnXXr1hUppC9PdnY2I0eORE9Pj5YtW1KnTh3q1asny0aTSqVERkby+PFjzpw5g4qKCqtXry724cfTp0/59ddf5Y4ZGxszbNgwnj59SrNmzejXrx+7d+9W+BmRl5fHjh07ePPmjcJ9wafs5okTJ9K4ceMi65UsWZLx48fLPWdFZTr09PQYPXq0yIQWvgkREBMEQfiXevz4MUeOHFE43r59ezp06PDZ/YSEhHD48OFCKe5ZWVmMHTsWQ0PDLz6fly9fMn/+fGrUqFFkzMzMjD59+nzxvoR/TkpKCj4+PrJC8AWaNGlCr169kEgkvH79mk2bNnH48GEiIiJkAZjKlStjYWGBnZ0dzZo1Q0lJiaysLPbs2cOKFSt4/vw5pUqVYurUqQqPHx4ezt69e4FPmV1Hjx5FVVWVVq1aERAQgKqqKjk5Ofz000+MHz8eExMT2bYFRfNv3brF/fv3+emnn4BPT9ILnnhLJBKSk5PJysqifv36pKen07JlS3bs2EGHDh14+fIlRkZG3Lp1C11d3b/1tf27ZWZmEhISUiRQVnCzoqysTIkSJcjKypJNudTV1aVx48aFpl3WrVtXdLoThN/58OEDrq6uCscVdbaVSqXs2bOHqKgoBgwYwJQpU2jZsqUswODv78+qVas4efIk+vr6TJgwgQ8fPrB161YyMzOxtbVl1qxZVKtWjTNnzuDi4sLDhw/p0qULS5YsoUWLFv/UJf+nFPdAsHz58kyYMOGLHwx8/PgRf39/wsLCiIqKokqVKowfP75IR255cnNzUVVVZefOnYwZM+aLjqWrq8vmzZuL1AyTSqWcOXOmSEfoEiVKMHLkSCpXrix3n1KplGXLlhV5sFm2bFlGjRqFRCKhWbNmKCsrc+fOnWIfchYXXAMYP348SUlJWFhYkJ+fz9atW2VZ5/DpIU2PHj0oW7ZskW3T09NZv3693AewAAMGDKBBgwYKjy0I/xRRQ0wQBOFfqk6dOpQoUUJuXSclJSVMTU2/aD9169bF2dmZK1euyNqPb9++nUqVKv2p81m0aJHcYBhAQEAALVq0EFli34ETJ04UCYYBPHjwgMOHD3Ps2DHZuKqqKo0bN8ba2prRo0cX+pIbFRXFwoULOXjwoOzG0czMjClTpvD8+XOFx9fW1qZMmTJcvnwZU1NTEhISqF27NrGxsbLf58mTJ+Pm5ibL4pJIJCgrKxMfH0+lSpVo3rw5x44dk+3T2NgYTU1N0tPTkUql3Lhxg549e7Jt2zYGDhzIpUuXuHnzJrdv36Zdu3aEhYXRrl07bt68iba29t/yuv4TNDQ0aNy4cZGn8O/evSsSJAsKCiIjI4Pk5GT8/f25efOm7OeioqJCvXr1MDMzkwXJTE1Nv/uAoCD8UxQVUi9QsmRJtLS0Ck1vU1JSolevXkyfPp1du3bh5uZG69atMTc3x9nZmUGDBtGmTRvatGnDkydPWL16NS4uLujq6jJp0iQAtm3bxvbt27GxseHnn3/mwYMHHD16lHnz5tGyZUt69+7N4sWL5WbeCP8nLCxM4VhCQgLv37//4vc3bW1tLCwssLCwoGHDhrRq1eqLgmGAbMqgpqamrF6XtrY2mpqactf39/cnLy+Pjh07Fhl7/vx5kWAYfMrSPnbsGOPHj5cb5IuMjGTZsmVMnjyZjh07kpOTQ7Vq1WjQoAEqKioMGzaMV69ecf/+/c9m/L969arYcT8/PyZOnEi1atU4ffo0enp6SKVSPn78iJqaWrG1zmJiYhQGwwquQwTEhG9BBMQEQRD+pdTU1Bg8eDC+vr5kZ2fLlispKWFpafmnbnZTUlJwcnICPn1h+7PBsEuXLhEbG1tsnYuIiAgREPvGPn78SEREhMLxgizB/v37M2HCBDp37lyosHBeXh6//fYbixcvJjg4GPh04zh58mRmzZqFnp4eGRkZrF27VmE3rapVq/Lu3TskEgnBwcG0aNGC1NRUdHR0+PDhA5MmTWLTpk3Y2trKulFKpVJatWrFtWvXmDx5MhKJRFbLDKB06dLUq1eP+/fvo6qqytWrV+nZsycDBgxg+PDhHDx4EEdHR8LDw2VBsaCgIFlQ7Es7a30vypUrR8eOHQvdVOXl5REZGVkoUBYYGEhUVBS5ubk8fvyYZ8+esWvXLtnPpnLlyjRt2lSWTdaoUSNq1KghuuAJ/3llypRRmAUGcPHiRcqVK0efPn0oUaIEmpqamJiYyAIlEydOxMHBgbNnz+Lq6srw4cOZMWMGkyZNwt7engYNGrB7926WLFnCunXrWL9+PcrKyowZMwYdHR127NiBt7c31tbWzJkzh0ePHnHw4EEWLFiAmZkZAwcOZNGiRdSvX/9rviz/Goo+X750vMDLly+5evUq0dHRqKio0LVrV7lBKUXS0tJQVlYmKSmJNWvWkJubi0QioU6dOvTs2bPItMErV65QqVIljI2Ni+zr8ePHCo/z9u1b4uPjqVixYqHlUqmU8ePHU7FiRRYsWFDkeBs2bCAtLY158+bx6NEjlJSUMDQ0VJg997k6XmvWrKFx48YcOXJE9rcgkUgoVapUsdtB8U0KCvYjCN+CmDIpCILwL5eWlkZgYCCJiYmUKlXqs5kfb9++JSAggA8fPqCrq4uhoSH9+/cnMTGRGzduKMzyUiQ7O5uGDRvStm3bYouVd+vWjZYtW/6pfQt/r7i4OLZv365wXFdXF0dHxyLL3759y4IFC9izZ4+s0Hvjxo2ZN28elpaWRep+FFeoeOzYsbLpQpaWluTm5lK+fHkSEhKYOHEiv/zyC/7+/rRt2xb4lN2krq5O6dKliYuLIykpCR0dHVq0aMGdO3eAT9Mw3dzc2Lx5MxKJhCZNmshuapKTk6lXrx4JCQnY2tqybds2Pn78SPv27Xn48CHm5ub4+/vLigX/16SlpREcHCwLlD1+/JjAwEDev38PfJp2KZFIZMHQggL+BYEyU1NTTExM/nVBQ0H4HEVNZZSVlcnLy8PT05OXL19iamqKvb09w4cPV5g5FBQUxMaNG9m9ezcAI0eOxMnJSRbQio+PZ9OmTWzevJm0tDSGDRtGtWrV8Pb2JjY2lsGDBzN37lzq1avHnj17WLRoEdHR0VhbW7NgwQKMjIz+sdfh3+jy5ctcv35d7ljp0qVxdHT8bIAlKiqKPXv2FAmevX//nsWLF39R9vDz58+ZOXOm3C6RFSpUwN7evtDnY7NmzTA0NGTfvn1F1t+7d2+xD6xsbGyKfD/z8fFh9OjRnDlzplCXY4Bjx47x4MGDIp/PrVu3ljWk+aN3797xyy+/yB3LysoiOjqanTt3/k8F8CMjI/Hy8lLYpMnKyoratWv/6f0Kwl8lHgEKgiD8y2lpadG6dWssLS3p0KFDscGwBw8e4O7uzt27d3n27Bm3b99m165daGlpceHChT8dDINPTyAjIiIYMmRIsev9mXpkwj9DV1e32KK1VapUkf13QfHpJk2aUKlSJbZt24ZEImHChAm8fv2agIAA+vfvX2R/cXFxTJs2jadPnxZarqqqSr9+/ahWrRqbNm2iV69esmMmJCQwYcIEfvnlF7Kzs+nZsyfw6Wl1bm4ukyZN4vXr13To0EF2QxobGyvbd+nSpWnTpo3svAMCAkhNTZVd886dO8nLy2P79u3cunULbW1tbty4QfPmzbl37x4dOnQgPj6ey5cvc+zYMW7cuCEL/P3baWlp0axZM2xtbXFzc8PPz4+kpCRiY2M5d+4cK1euxNramvr166OiokJGRgZ37txh27ZtslpxWlpa1KpVi6FDh7J69WrOnTvH27dvv/WlCcJf0q5dO9q0aVPoBl1XV5fhw4ezYMECoqKiOHXqFAYGBjg6OlK5cmXGjBnDrVu3+GM+gYmJCdu3b+fly5fMmzePkydPYmJiQrdu3Th9+jTlypVjyZIlxMTEsGLFCi5evMjy5csxNzdn9uzZ3L17F1NTUwYNGoSJiQnPnj1jy5YtXLlyhbp162Jra8uLFy++8iv0/WrRooXC4OSXdO4s6FQsL5OsdOnSnDlz5ovO482bN3KDYfApCPr7z8EPHz7w4MEDudMlAYU1wuBTkPaPjWDi4+OZOnUq1tbWRYJhsbGx3L59W+7n/Y0bN2Rdm//o2LFjcmuz5efnI5VK2b17958OhhVsZ2Zmxq1bt+SuY2RkJDdrThC+BpEhJgiC8IN4//49mzZtkvsFUElJialTp35xR8mCj46XL19St25dHBwcWLt2LXv27CEyMrLI+qKo/vfj1KlT3L9/X+7YmDFjKF26NPPnz8fb21tWKNfU1JT58+fTr18/8vPziYmJIScnBz09PdnvTH5+PuPGjcPLywv4NP1hwYIF9OrVC3V1dYyMjNDQ0GDy5Mn88ssvslpi0dHRjB8/Hnd3dwAsLS05ceIE8OkmoEyZMrRu3ZqjR49y/fp1WeBLS0tLVr8lOzub6OhojIyMUFNTIzs7m/Pnz9O1a1fZtRWcm5GREU+ePJEV7+/UqROpqalYWloWmtKhrq7OsGHDFHbf/C/KyckhPDxclk0WGBjIw4cPiYuLAz79TCUSiew9pHTp0jRs2JAWLVrIplwaGxuLTmHCv0pmZiZxcXGoqalRuXJluVO7YmNj8fLyYseOHbx48YL69etjb2/PiBEj5HYizM7O5tdff2XDhg08ePAAY2NjnJycsLGxQUtLS9aMZNWqVYSHh9OxY0eaNGnCsWPHCA8Pp0ePHri4uNCkSRO2bdvG8uXLSU5Oxs7Ojjlz5lC1atWv8dJ811JSUrh48SJPnz4lLy+P9+/fEx4ezokTJz4bEPv48SPr169XOK6urs7s2bM/ew6HDh2SlQ+Qx9zcXPaA5+TJk/Tp04fw8HC5Dwg/fvzIhg0bigRb4VNmmYWFRaFlw4cP5+zZs4SEhBQKluXl5TFs2LBip9u2bNmSbt26FVq2ZcsWJk2axIQJE5g+fTr379/nwYMHPHv2jLZt2zJt2jTy8/N5/vy5rOZn3bp1i82kS0hIwMHBgcOHDzNixAg2btxIQkICN27cIDY2VvZZoqenR9u2bcWDU+GbEAExQRCEH8T169e5fPmywnELCwuaNWtW7D7evn2Ln5+fLK0/OTmZc+fOcfPmTUqVKkVOTg5Xrlzh4cOHZGRkUKpUKZo1a0bLli1FXaJvKCsri4CAAF68eIFEIiE1NbXQE2JVVVVq1qyJm5sbd+7cQSqVUrJkSaysrFi6dKmsbklwcDCnT5+WBaKUlJRo0aIFGRkZDB48WFbLrlmzZpw7d47SpUvLjpGfn4+FhQXnz5+nevXqqKio8Pz5c+zt7RkzZgxBQUEkJCRw//59bt26hZKSEq9fv2bfvn2MHj0aHR0d3r59i0QiISMjA01NTVmx/ZycHKRSKerq6qiqqpKVlcXs2bNZunSp7PgpKSnUqVOHuLg4Vq5cyaxZs4BPU0Q2bdok9/dTS0sLZ2dnhVM8fhQfPnwgKCiIJ0+e8OjRI+7fv8/Tp09lvwcSiUR2E6eiooKhoSHNmzfH3NycRo0a0bBhw++6eYEgfKn8/HwuXrzI9u3bOXbsGMrKygwePBh7e3vatGlTJBAjlUq5efMmrq6uHD58mFKlSmFvb8+kSZPQ19cnLy+Po0ePsmLFCh48eECTJk1o27Yt58+f5+nTp3Ts2BEXFxeaNWvGL7/8wqpVq0hPT2fixInMnj27SNbQv8379+/x9/cnKioKJSUljI2Nad26tcKi9PLk5+eTn5+Pv78/HTt25LfffmPAgAHFbpOSksKGDRuK3eeiRYs+e+wDBw4QGhqqcPz30xOnTZvGwYMHiYmJkRuwk0qltG3blkaNGsnqrSopKdGkSRO6d+9e6EHDmTNn6NmzJz4+PowaNarQflxcXDhx4kSxr8EfH1Ju2LCBqVOn4uzszPr160lJSWHAgAH4+/uzZ88eBg8eTFpaGvv27SuUna2srEzPnj0xMzMrcozjx49jZ2dHXl6erMENfKpV6uXlVWg/Bb6002R6ejrR0dFIJBJq1KhRbDF/QfgcERATBEH4QZw9e1ZWc0medu3aKUzlh0/BsJ07dxYq4A+fbobt7OwKpftLpVJyc3NRUVERhVK/sZSUFLy9vUlOTi60XE9Pjzp16nD69Gm8vLxkWUAmJiYsWLCAgQMHFvrZxcTE4O3tLffp9eXLl7l27RolS5bk9OnTsvpfBdLT02nSpAmhoaG0atWKhIQEwsPDsbOzo127dkW6Uubl5eHr60t+fj4zZszAwcEBFxcXlixZAnyqRVKrVi2UlJTQ1NTk48ePANSqVYvo6Gjy8vJo2rRpkeLIfn5+dOrUCVVVVcLDw6levTpXrlzh6tWrCl+/wYMHU69evc+9zD8cqVTKy5cvZdlk9+/fJyAggJiYGLlZqBUqVMDU1JRWrVrRpEkTTE1NqVatmnh/EP613r59i7e3Nx4eHjx//pw6depgZ2fHqFGjKFeuXJH1o6Oj2bx5Mx4eHqSmpjJw4ECcnZ1p0aIF8Kk5zcqVK7l06RJGRkZ06dKFmzdv8ujRI1q1asW8efNo0aIFbm5urF+/nry8PBwdHZk+fbrcLLXv3bt379i5c2eRTtllypTB1tb2TwXFCnTv3p2YmBiePHlS7IMMqVSKu7s78fHxcsdfvXqFh4fHZ4935MgR7t69qzAgY29vL/tuZGZmhomJCbt27ZK77uXLl+ncuTMAN2/epFKlSlSsWLHI65CamoqJiQnGxsacO3eu0HvoqVOn6N27N8uXLy/yXe33LC0tZZ1Mly9fzty5c5k9ezbLly8nNjaWnj17EhMTw7Fjx2jXrh0A+/btIzw8/LPXmZKSgrOzM15eXvTp06dI1/IHDx5w8uRJufvR0tJiypQpCrOMpVIp165d4/r167KOlaqqqnTu3JnmzZsrvF5BKI54XC8IgvCD+NyT5D92L/qjS5cuyf2CJZVKi9SckEgkqKqqipvd78DZs2eLBMPg0xf+gi/AKSkpjB49mjdv3vDkyRMGDRpU5Gcnr25OgRYtWjBz5kw+fPhQJBj26tUrqlevTmhoKCNHjiQxMZHw8HDGjRvHxIkTiwTD4NNT5969e+Pt7c26detQUlJi+vTpsvHfP1n+/Y2IiYkJeXl5KCkpybIUf69jx46MHz+enJwcRo8eLWsXX5zPjf+oJBIJ+vr69OrVi59//pnffvuNqKgo0tPTCQwMZPfu3UydOpXWrVtTunRp4uPjuXDhAosWLcLS0pLq1aujpaUlK1ju7e1NYGBgsTdxgvA9qVixIrNmzSIsLIxLly5hamrK7NmzqVq1KsOGDcPPz6/Qe2b16tVZs2YNr169ws3NjYcPH9KqVStatGiBr68v7du35+LFi9y9e5eGDRvKAjZjx44lOzsbCwsLunbtSqNGjXj+/DmOjo64ublhYGDAokWLSElJ+Yavxp934cKFIu/RAElJSQoL5isSERHB/v376datG2ZmZmzbtq3Y9SUSCd26dZObGVxQX+xLckaysrI4efKk3O86rVq1kgWJkpKSCAwMLPah46ZNmyhdujTDhg2jSpUq6Ovryw0Kzp8/n/j4eNzd3QsdNyoqihEjRmBpacmsWbMUfucrW7YsJiYmSKVS5s+fz9y5c1m0aBHLly8nJCSEli1bkpycjL+/vywYVjAdVZEHDx4An7poNmzYkEOHDuHp6cmxY8eKdC0vbj9paWmyh3PyBAQEcOXKFVkwDD5N9S+YOioI/wsREBMEQfhBmJiYKJy2VKZMmWK7++Tn5xfb/SgyMrLQFxTh+5CVlVXsdA5zc3P27dvHx48f8fLyKjYoWtyXVE1NTWbOnFnk5uLevXsYGRnx7t07li5dyr1793j27Bljx47Fw8Oj2C+wOjo6qKmpER4eTqtWrQoVUC4IiEmlUtkNQ2JiIubm5sydO5e5c+diZWXFhQsXiux3/fr1VK5cmStXrnDo0CHZ1BRF/u1Tkr42dXV1TE1NGTFiBOvWrcPf35/k5GTevXuHn58fbm5uWFlZYWxsTG5uLo8fP8bDw4MxY8bQuHFjSpQogZ6eHr169WLFihVcvnyZxMTEb31ZgqCQkpISnTp1wtfXl9evX7N8+XIePnxIp06dqF27NqtXry6UiVSyZEkmTZpEaGgoJ06cQFtbG2trawwMDFixYgU1a9bk0KFDhISE0LNnT3bv3k1ERATW1tay5iSdO3emUaNGREREYGtry4oVKzAwMGDVqlX/ioYgubm5xQZGivvc+qObN2+yd+9ewsLCSE1NxdjYmHfv3hVbIgI+ZRSPGjUKAwMDlJSUyMzMRF1dnXr16vHy5UvevHnz2WOnpaURFBTEmDFjkEgkvH37ltq1a2NlZVWok+O1a9eQSqUKA2JRUVGkpKQwadIkateujbe3N66urjx79qzQevfu3cPNzY1FixZRs2ZN2fLMzEwGDRpEmTJl8PHxISwsjHnz5hEbG1so28rQ0JBRo0ahoqLC7NmzWbJkCStXrmT+/Pn4+/vLHmDcunWrUA2ygq7EiiQmJjJlyhQ6duxIjRo1ePz4MWPHjlU4NfR/pago/+fGBKE4IiAmCILwg1BTU2P48OFFplZUqFABa2vrYgthf+4LjFQq/UtfcoR/RmZmZrE/lypVqjBs2LAvyuT73PSVEiVKFPr/X3/9lRYtWpCTk8OBAwfYt28foaGhjBkzBk9PT4DPZgPt2LEDgMWLFxdaXtBtTSqVoqWlRVJSEp6enuTl5aGqqoqysjJGRkYEBATw8uXLItfx66+/Ap8K7desWbPIuReoWLHi/9R5VSiqbNmydOjQAUdHR/bv38+zZ8/IzMwkIiKCw4cPM2fOHDp06EC5cuWIjY3l9OnTzJkzh86dO1OuXDlKlSpF06ZNmThxIgcOHCA8PFzu1ExB+JbKly/PtGnTCAkJ4erVqzRr1oz58+ejp6fH4MGDuXDhguz3VklJid69e3Px4kUeP35Mjx49WLRoEdWqVcPBwYG8vDx27NhBVFQUtra2HDt2jIcPH9K/f39KlSrF0KFD6dSpE40bN+bZs2dYWVkxb948atasiaurK5mZmd/41VCsoGuhIrm5uV+0n4LC+vJcv36dpKSkYrevXr06o0aNYt68eVy/fp2AgABZLdXAwMDPHj8tLQ0NDQ2qVavG27dvuXHjBlZWVtSuXbvQ56qfnx81atRQ+Hni4eFBu3btUFVVlS1LTU3l4MGDsgdAOTk52NnZYWpqypQpUwpt7+joyNOnT/ntt9/Izc2lV69eVKxYkVWrVjF9+nTs7e2ZOnUqw4cPR1tbG2dnZ1avXo2rqyuzZs3i0KFDdO3alcaNG3P9+vUiTRuK614On7L9tm7dyvr167l8+bLC60xMTJRlk8mjqalZJKOsQF5eXrEPRxRNfxWEzxEBMUEQhB9IxYoVmTRpEiNHjsTS0hIbGxscHBwoW7ZssdspKysXGxgoKJIufF+0tbWL7RxaXJv330tJSeHGjRsKx42MjAoFzJYtW8aQIUNQV1fn5s2bLF68mKdPnzJ69Gh27twpW09fX1/hPlVUVDh06BC6urp06NCh0Njvn5qXLFmSa9euyZ16I5FI5N4stW7dmvHjx5OSkiJrW1+qVKlC61SqVAkrKysx7fcfpKSkRK1atejfvz/Lli3Dz8+Pt2/fkpqayr1799ixYwejRo2STYV98OABW7dulWWYqaurY2BggKWlJevXr+fOnTuyQv+C8C1JJBLatWvHnj17iI2NZc2aNTx9+pRu3bphaGjI8uXLC2XdNmjQgB07dvDy5UvmzJnDsWPHqF+/Pj169ODJkyesXr2amJgY5s6dy/Xr17l16xbdu3enYsWKjBo1ShYYCwoKonfv3kyfPh1DQ0Pc3d2/y2nIampqxXbKNDAw+KL9PHv2rNjA2qNHj774nNq3b8/Vq1fR19dHR0fniwJi6enpss++2NhYqlSpInc9Pz8/hdlhqampCgOA+fn5ssyn9evX8+TJEzw8PAp93yqoY7dlyxbq1q1Lv379+PjxIydPniQnJ4fLly9z/vx5Ll68SFRUFA4ODmzcuBF3d3ecnJzYuHEjQ4YMYcCAAZw5c6ZQNnaBkJAQoqOjFZ7j69evCQgIYMqUKXKnoebl5bF161aMjY3x8vJS2GSpS5cuCr9Lfq58QcmSJYsdFwRFREBMEAThB6OkpETNmjVp3LgxNWrU+OIb/k6dOsnNIiuYLiJ8f5SUlGjZsqXC8VatWn12H+vXr6d8+fLs3buXt2/fFhnX0dGRtZUHsLGxwcXFhfLly/Ps2TNsbW0JDg5m1KhReHl5Fdr22rVrpKamyj1uqVKlSE5Olk1F+b2oqCjZf2traxc79SYmJoasrKwiy11dXalYsSJ79+7l9evXODo6MmzYMHr16oWNjQ329vaFumQKX4+mpiZNmzbF1tYWHx8fnjx5QlpaGm/evOH8+fMsXLiQrl27UrFiRWJiYjhx4gTTpk2jRYsWaGlpUbZsWZo3b46joyPHjh0jLi5OZLAK30yZMmVwcnIiKCiIGzdu0K5dO5YuXUq1atXo378/Z86ckZUcKF++PC4uLkRHR7N7924SEhKwsLCgfv36HDx4kClTphAdHY2rq6ssC61t27ZUq1YNOzs7unTpgpmZGYGBgXTo0IGJEydSu3ZtvLy8vjjr6mvp2LGj3O8f2dnZn81IKpCTk1Ps+JUrV774fNq1a0d8fDxhYWE0atSIkJAQjh49yrZt29izZw9Pnjwp8j6SlpYme+ikKCCWkJDAkydPFAbE9uzZU+yDq7i4OCIiIli4cCFTpkyhSZMmsrFHjx4xYcIEbG1tGT16NLa2tty/f59jx46RlZWFu7s79+7d48WLFzx+/Jhdu3YREhKCl5cXdnZ2zJw5EycnJ6ZPn86ePXtQV1cvdGypVMq6deto27YtkZGRRUoI5OTkIJFIOH/+vMLmM9euXaNJkyZMnjyZ/v378/TpU6ZPn06zZs1kNUCrVKnC0KFDZYX+f+/du3fMmzePGjVqcPv2bYWvU6NGjRSOCUJxRJdJQRAE4Yu9fPmSixcvEhMTA0C1atXo3Lkz1atX/8ZnJigilUrx8/Pj5s2bspuuEiVK0KNHDxo2bKhwu8ePH9O3b19evHiBtrY2np6eDBw4kKdPnxISEkJubi7Vq1fHzMwMDQ0N8vLyaNu2raz2yJ07d2jZsiVPnjxh5MiRRTprhYWFUbduXWrWrMncuXNlT5/z8vLo0qULkyZNIjg4mKSkpCKBqTp16siyxPr27UurVq3kZogVmD17dpEv+gB37tyhRYsWVK5cmZcvXxY7bVj4PuXm5hIREcH9+/fx8/MjICCAyMjIIgXG1dXV0dPTo2HDhrRr144uXbpQp04dkdkqfBPv379n7969bN++ncePH6Ovr4+trS1jx45FT09Ptp5UKuXGjRu4urpy5MgRdHR0sLe3Z9KkSVSqVIkDBw6wcuVKgoODady4MaVKleLatWtUrFiRGTNm0LZtW1atWsVvv/2GkZERixYtYujQoQozdL62iIgILl26JKvXpaenx8GDB7l//z5Xr17FxMSk2O1fv34tm1ovj6+vL1evXv2iWpAfP35EV1eXLVu2EB4ejqamZpHXqVGjRlhaWsoCedOmTePUqVOEhoZSrVo1Ro8eLeuGXODQoUMMHjyYly9fFvrZwqefb6NGjejXr5/Cn0m1atXYt28fz58/JygoSBY8e//+PU2bNqVUqVLcuHGDNWvWsGDBAnx9fenfvz8bNmxQOG123LhxzJgxA19fXzZs2ICTk1ORdZKSkhg9ejQnTpxg5syZLF26FCUlJdavX8+pU6coWbIks2fPpk2bNnKP8erVK2bOnMnp06cZOnQoVatWRSqVUqlSJTp06CCrWyuVSuUGRmNjY1m3bh3u7u4AjB8/HkdHR/z8/GTfQQsYGxszZMgQ8Rku/E9EQEwQBEH40woybuQFGYTvU3p6Oq9evUJZWRl9fX2ys7O5cuUKT58+JScnB319fdq1a0fFihUZOXIkv/76KxKJhNGjR7Nt2zZZbZOcnBwCAwMJCwtDKpVSs2ZNDA0NadasGdHR0fTq1YujR4/SpEkTHj9+zIgRI9i9e3ehc5FKpejr6/Pu3TtOnDhBgwYNaNq0Kfn5+YSFhZGenk6FChUwNzfn7t27Ra6lbNmyZGVlkZaWxqhRo+jbty9PnjyRe935+fksWrRI4etia2vLzp07mT59OmvWrPkLr7DwPfn48SOPHz/mypUr3Lhxg+DgYOLi4gpllEgkEsqWLYuRkRHNmjWjW7dutG7dWu6UIUH4J0ilUu7du8f27dvZv38/mZmZ9OzZE3t7eywsLAoFbKOioti8eTM7duwgLS2NQYMG4ezsTLNmzTh9+jQrVqzg5s2bGBsbU6lSJW7cuIGuri5Tp06lTZs2rF69mpMnT1K/fn0WL15M//79v5sp4enp6SgpKaGhoUFycjIdO3bk7du3XL9+HUNDQ4XbSaVSfH19CQsLKzJWrVo1pkyZwpgxY3B1df2i82jevDmGhobUqVNHYY3CkSNHygraOzg4cO/ePe7evYu6ujqbN2/GwcGh0PqTJk3i/PnzcjOZr169SocOHRg6dCh169aVe7wyZcrg6OjIuXPn6Natm+y6+/fvz9WrV3nw4AF37tzB2tqaJUuW4OLiwtOnT2W1MuWJjo5m37597Nmzh0GDBhUZv337NkOHDiU1NZVdu3bRq1cvXrx4wZgxY7hy5QrOzs4sX75cbv3NzMxM1q9fz7Jlyyhfvjzjxo2T23Rp0KBBhQr3F4iKimL16tXs3LmTEiVKMHnyZJydnSlXrhzw6TM9NDSU58+fI5FIMDY2xsjISOHvclZWFtHR0bLvHYpqhgo/LhEQEwRBEIQfTEZGBh4eHiQnJxcZ+/XXXwkODqZ27dqcOHECIyMj2VhWVhY+Pj5FOk4mJyfj6emJvb09q1evpkmTJjx69Ahra2v27t1b5BgTJkzg+fPntGrVSvYl9s2bN5QtW5alS5cyYcIE3N3dOX36NBYWFkW2V1VVpWLFirKpjvPnz2fHjh1Fnobn5eVx/fp1/Pz8FL4WOTk5VKlShUqVKjFx4kSysrLQ1dXF3NycunXrfjc3jMJfJ5VKef36NTdv3uTSpUs8ePCAyMjIIn8HBZ0uGzZsSNu2benZsyeGhoZ/+nchIyODkJAQUlNTqVixIkZGRt9NZo7w/UlJSWH//v1s376dgIAAqlatiq2tLba2toXqLX78+BFvb2/c3Nx4/vw5zZs3Z8qUKQwYMIA7d+6wcuVKTp06RdWqVTEwMODOnTuULFkSJycnWrZsyZo1a7h48SJmZmYsWbIECwuLf+R97u3bt0RERCCRSDAyMvpsR9/fi4+Pp127dmRmZuLv718ks+r3cnJyuHDhAg8fPiQ3NxeJRMK9e/cYMWIEERERLF68mLCwsC/KZJ8xYwZXr16lV69eCtcxMzOjT58+wKfgWHR0NAcPHqRy5cocO3YMS0vLQuvXq1ePNm3asH379iL7GjBgAMePH8fe3h49Pb0iU0Br1qzJlClTsLCwKPRgafXq1cyaNYvjx49TtmxZOnXqxJAhQ/Dx8UEikRAQEMCJEycUXsPDhw+ZOnUq7dq1K7RcKpWyfv16Zs+ejbm5OQcOHEBPTw93d3cuXbqEjo4Obdq0YdCgQUW6lkulUk6cOMGUKVOIiYnBycmJzp07y32oBVC6dGkcHR1lv3uhoaGsWLGCvXv3ygK5NjY2PHz4kNDQUPLy8qhRowbt27cvtiP27929e5eLFy/KXlcVFRU6dOhQ6LuHIIiAmCAIgiD8YC5fvsz169fljiUmJlK/fn3Gjx9fZOzChQvcvHlT7nYlSpRg2rRpNG3alMDAQIYNG8a+ffuKrHfnzh3mz58vt36Zuro6Dg4OshuX9+/fF/nS+vHjR0qVKkXDhg15/Pgx8+fPZ9GiRSQkJHDp0iVZ98EaNWrg6upKUFAQ6enphbp3/ZG3t7fcgsHt2rVTWPdF+O/Izs4mKCiI8+fP4+/vz9OnT4mNjS1Ue05JSYly5cphZGSEubk53bt3p0OHDrIaOH8UGhrKkSNHChU0L1euHNbW1l9cH0n4cT148AAPDw/27t1LWloaPXr0wN7enl69esney/Ly8jh9+jSurq5cvnwZPT09Jk+ejJ2dHa9evWLVqlX4+vpSunRpateuTUBAAGpqakyePJnmzZuzdu1a/P39admyJUuXLv3baoHm5+dz8uRJHj58WGi5ubn5nwq+vXz5krZt26KhocGVK1cUdh8skJOTQ2pqKlpaWgwcOJBHjx5x7949TE1NsbCwKFLDUp6TJ08yffp0hg0bpnCdBg0aMGDAAAAGDhxIWloay5cvp0mTJty7d4+mTZvK1n3z5g2VK1dm3759RfYZExODgYEB+fn5BAcHM3ToUJo0aSL7GdetW5dFixZx/vx5QkJCZAHFK1eu0LlzZ2bNmsW4ceNo0aIFderU4cKFC7Ks/bdv38qmGsrTrFmzIg+b5E2RTEpKYsGCBUUa8KirqzNixAhZoDI0NBRnZ2dZFpubmxt16tRhx44dvH79WuF5TJ48mZiYGJYvX85vv/1G5cqVmTFjBnZ2dsCnTtPv378vtI2KigqjR48utikDfGoEcPDgQblj/fr1w9TUtNjthR+HeEwlCIIgCD8YedNLCpQtW1buFApA4bRE+BRUaNasGYGBgVhZWckNhuXm5tKvXz9ZW/s/ysrK4sCBA6SkpDBixAi5N04F2WllypQp9O/y5ctjZWXF3LlzcXFxwcbGhvr165Obm8v9+/cVnndCQoLC7lnXrl0jKSlJ4bbCf4OamhpmZmbMnj2bkydPEhkZSWZmJomJiRw6dAgHBweaNGlCXl4eN2/exNXVFQsLC0qUKIGWlhZ16tRh8ODBbNq0iaioKJKTkzl06FCR7n7v3r3j4MGDosC/8FlNmjTB3d2duLg4PDw8SExMpH///ujr6zN37lyioqJQVlamT58+XLp0iUePHtGtWzcWLFiAnp4eW7duxcXFhfDwcKysrHj48CEqKirUqVMHV1dXrK2tMTc3Z9++feTm5tK5c2c6duyIv7//Xz7327dvFwmGAdy7d6/Y9+I/KlWqFIsWLWLw4MFs27aN7du3ExkZqXB9VVVVdHV1UVNTw83Njfj4eDZu3IiLiwu7du0iODj4s8ds06ZNsQEcKNwduaCofmxsLECRovoFRf3/2CkZYMuWLQD06NGDhw8fEhQUxKRJkxg8eDD9+vUjMjKS/fv3s2HDBlkwLC4uDisrKzp06MDUqVPp3bs3Ojo6HDlypFAJC4lEQkREhNzz19HRoWvXroWW3b59m8aNG3Pjxg1OnjzJqlWrOH78OF27dpXbjTorK4ujR4/y4cMHZsyYQYMGDQgPD+fYsWOcPXuWOnXqFPsaFhg3bhxmZmYEBATg7u5OZGQkzs7OaGlpcePGjSLBMPj0PeL8+fOf3beih3efGxN+PCIgJgiCIAg/mM/dkCuqnVJc4fq8vDyCg4MZMmQI+/fvl7vO0KFDKVGiRLGFzIODg5FIJCxevFju+MuXL4H/C4SVLVu20LiSkpKssO6oUaMAOHDggMLjhYaGKhz7knHhv6tMmTIMHDiQrVu3cvfuXd69e0dubi5Pnjxh1apV9OnTh0qVKhEdHc2hQ4dwdHSkZs2aDBkyRG7NHPiUMfLq1auvfCXCv1XJkiWxtbXlzp07BAYGMnDgQDZv3kzNmjXp1q2bLPDasGFDPD09iYmJYfbs2Rw5coR69eoxadIkLC0tiYqKwsnJibCwMHJzczE2NsbDw4MxY8Zgbm6Op6cnycnJtG3bFgsLiz8VuPqje/fu/U9jv5eWlsbOnTt58eKF7PMiLi6OPXv2FPtAp0DNmjWZNWsWa9eupWPHjujr6+Pi4vLZ7UqXLo2hoSEfPnyQO16mTJlCzWjS09PR0tLi9evXKCkpyaby5efnc+vWLYKDg5k7dy4nT54s1KUyIyODrVu3kp+fz7Rp01i0aBF9+vTB3NwcgNTUVBwcHOjatSsjRowAPmXAFRSO37VrF8OGDSMuLo6TJ08W+hyMjY3FyMiIAwcOFMmM1tfXZ8yYMbLXtGCKZNu2balatSqBgYG0bt2akSNHMmjQINq3b6/wtUpMTKRt27Zs2bKFhQsXEhwcLGs4kJ2djbu7O4cPH1a4/du3bwkLC2P37t08e/YMe3v7QkG94n7OMTExChsG/H7/isTHxyv8niP8eERATBAEQRB+MAUFgeXR0dEpEmQqIK+lfIF3795haWmpMPh06tQpDh8+TLVq1Yo9t3fv3tGwYUOF5xAUFESzZs2oXbs2jo6OJCcnKwwwtG/fHiUlJc6ePavweH+s2fJnx4Ufi5KSEiYmJsycOZPjx4/z/PlzMjIy+PjxI4cPH2bChAnF/n3Bp8yQHTt2cP36deLj40XGmPBFTE1N2bx5M3FxcXh7e5OWlsbgwYPR09Nj1qxZhIeHU6FCBebPn090dDQ+Pj68ffuWHj160KlTJ6pXr05oaCjLli3jzZs3pKWlYWhoyJ49e3BwcKBp06Zs2rSJFy9eYG5uTr9+/Xj8+PGfOkepVCo3q6eAvLqV8ty6datIp9iC/Z8/f/6L/mZmz55NlSpVmD59OosXL+bo0aPcuXPns9u1b9+eY8eOoa6uLgu6SKVSjI2NsbGxQU1NTbZuWloampqaxMbGUqlSJZSVlZFKpRw5coTz58+joqKCqqoqsbGxHD58WFaqwNfXl5SUFOrWrcurV68IDw8v1Pxl3rx5JCQk4O7uLsuU/vnnn7l9+zb79+9n0aJFXLlyhcOHD8u6NQK8ePGC2rVrk5KSwubNm9HS0mLNmjXExMQwYcIExowZI2sakpSURL9+/Zg2bRpTpkzh6tWrhIaG0qBBA44fP86uXbto27Ztsa9Vs2bNCA0NZe7cubJu07t376Zu3bpMnDiRsmXLFukSDZ8eoLVo0YInT54wYsSIIg/JpFJpsQ/gCtaRJyYmhpUrVxb7uyavg6jw4xK/CYIgCILwg2nVqhUlS5aUO9alSxeFXxQVtVeHT52lFNXrSE9PZ8iQIaipqXHo0CFZdpc8QUFBzJ8/X+5Yfn4+b9++pWfPnqirq1OmTBmSkpLYuXMnISEhRdZXV1fHyMiIiIgIhRk7nyu0XKNGjWLHBQE+ZfL079+fLVu2KJxyXODKlSvY29vLurqWKVOGFi1aYGNjw4oVKzh8+DDBwcGFapgJQgFNTU1sbGy4ceMGQUFBWFtb4+HhgbGxMZ06dcLX1xf4lCH74MEDrl69Sp06dXBwcKB+/fokJydz/fp1tm3bRlZWFikpKdSoUYPffvsNJycnmjRpwqpVqwgKCsLU1BQrK6svzpSVSCRyAyAFvrSD6/PnzxWOJSYmftFU9hIlSuDq6srZs2fR0tKiQYMGzJ49+7PBtHbt2hEZGUmtWrVYu3YtmZmZuLm5MXToUEqVKlVo3d9PmSx4YBQTE0NQUJDcfV+9epWPHz/KOhpPnz6dJUuW0L9/fxo3bgx8yqLbuHEjixcvlgXXDx06RGBgIPPmzcPPz48SJUqwbt26QlMxIyIiqF+/PmlpaXh5eREeHo6zszMTJ07Ew8ODChUqyNa9c+cOZmZm+Pv7c+LECRYsWMCUKVPo1q0btWvXJigoiJEjR372NV6/fj3VqlWTBQEbNmzIqFGjaNCgAY8ePcLb25ty5crx7NkzkpOTyc7ORldXl/HjxzNq1Kgi3zXS09Px9PSkadOmsumm8lSuXLlQt8ikpCS2b99O+/btqV69OosXLy42oNaoUaPPXpvw4xABMUEQBEH4wWhrazN27Fjq1q0r+0JasWJFhg4diomJicLtjIyM6Nu3b6EvopmZmcTGxuLh4aFwOwsLC9LT09m+fTvly5enb9++covcP3/+nBcvXtC/f3+5+wkODi40paKAVCrl1KlTcoNeFhYWstpP8tSsWVNh0KtmzZqF6sUIwpcwMzNTGFQuV64cN2/eJD09nSdPnnDo0CFmzJhB7dq1efbsGatXr2bgwIGYmJigqalJrVq16NmzJ1OmTMHd3R0/Pz9iY2NFVpkAQP369XF1deX169fs2bOHvLw8hg0bRtWqVZk2bRrPnj2jXbt2HD58mIiICGxsbNi8eTN169bFz8+PXbt2cfDgQUqWLMn79++pWrUqZ86cYdasWTRu3JgFCxZw8+ZN6tevj42NTbE1vAr8vqj8H50+fZobN258dh+fK7w/ZcoUhQ85fs/S0hILCwumTp3K/PnzuXLlChcuXCh2m4KsqOTkZHJzc8nJyeH9+/eyOmG/l56eLssQKwiIFTfVLz8/n7NnzxISEoKuri45OTlERkaycOFC4FNG8rhx42jUqBHOzs4APHv2jJMnT9K2bVvZ332ZMmVITk7m4sWLwKcHSQ0aNCAjI4Pdu3dz7tw51q1bh5ubG6tXr5a9H0mlUjZs2ECbNm2oUqUKDx8+pFy5cjRu3JidO3eyadMmzp8/T+nSpZk6dSq2trYKA/ONGjVCW1ub8+fP06xZMwYMGECVKlW4ffs2Bw8e5N69e9StW5cRI0aQmJjIgAEDWLZsGY6OjkUK4oeFhTFlyhSqVq2KnZ0dVapUwc7ODi0trSLHVVJSomvXrmRmZnLo0CH69+9PpUqVmDBhAhoaGuzatYu3b9/i6upKrVq1imyvr69f7FRQ4ccjukwKgiAIwg8sNzeXvLw8uYGmAllZWQQHB/P+/XvKli1LrVq1GDJkCKGhoZiZmXHo0CHZuhkZGVy/fp2nT5+Sk5ODVCpl48aNGBoaFnrim5SUxO3bt4mNjUVDQwM1NTWsrKywsbFR2A3s4MGDcjPBCowaNQoDA4NCy4KDgzExMWHs2LF4enoqvL5z587x+PFj8vLyUFZWxtTUlO7duxeaHiMIX+rx48ccP3680E27jo4Ow4cPlxXIlkcqlRIfH8+zZ8+K/BMZGSnbn7a2NsbGxtSuXbvQP8bGxmhqav7j1yd8v0JDQ9mxYwfe3t6yOk/29vYMHDiQEiVKkJKSgre3N25ubkRGRtKyZUucnJzQ0dFhzZo1XL58mYoVK5Kbm0tiYiK9e/fGxMSE0NBQPnz4QN26dZk9e7bC6e/5+fmcOHGCwMDAQsuNjY3ZvHkzt2/fZubMmSxatEjh++uVK1e4evWq3DF1dXXmzp2Lra0t27Zt+2zwrCBzaubMmdy+fRs9PT2aNm2KmpoadevWpXnz5kUe0NSvX5/WrVtz4cIFOnXqxM6dO/Hz8ytSHL9s2bLMmDEDX19fWrVqxZYtWzh//jy3bt1SeD7R0dF4e3szf/58vL29adasmSy7euXKlbi4uHD37l3MzMxIS0ujb9++CqcuSiQSOnfuTKdOncjNzWXPnj14eHhw8+ZN9u7dy8CBA2XrJicnM3r0aI4fP86MGTNYsGABK1asYMWKFZibm7Nr1y6MjIw4evQojo6OJCYmYm1tTXBwMI0aNZJ1+ZRIJDRq1IjSpUszf/58rl69SsuWLVm2bBktWrTA09NTNk2zX79+zJkzR1Yb7fdyc3M5ceIEW7Zs4eLFi5QrV45x48Zhb28v+xxPSkri0qVLPHv2jLy8PPT19SldujQnT57k0KFDpKSk0LRpU0aMGMGAAQOIi4uTNZwwNjamXr16hIeHEx4ejlQqpVatWtSrV09MlxQKEQExQRAEQRAUevHiBQcOHChUwDYrKwsfHx/Mzc05evSobHlmZiY7d+4kISGh0D4Kshbq1q2r8Djt2rXj+vXrxMXFyb54/9HevXsVds4CsLa2xsjIqNCyqKgoFi9eTLVq1dDS0qJu3bp07NgRbW3tIttnZWWRmpqKtra2CIQJf1lKSgpBQUGkpqZSsWJF6tWrJzcz8ktlZ2cTGRkpN1j27t072Xr6+vpFAmW1a9dGT09P3Aj+QLKysjhy5AgeHh5cvnwZXV1dRo4ciZ2dHSYmJuTl5XHy5ElcXV25cuUK1apVY/LkyZiZmbF161aOHDlCxYoVGTBgQKHpdvCpA2/9+vX5+eef5XYhhE8NJAqmPhoZGVGhQgXy8vJYvXo1CxYsoF69euzZs0duVnJmZiY7duwgMTGx0PLc3FxCQkLo1q0b48ePZ+rUqaxdu/azQTEXFxf27NmDra1tkWLqenp6jBo1qtDf5sSJE7l8+TL16tXjw4cP+Pn54eHhga2tbaFtS5QowerVq1myZAmOjo64uLjw/Plz9uzZI/c8JBIJGzZsIC0tjRUrVjBjxgyePHlC/fr1iYiIoEGDBkyePJk1a9YglUoZNWoUycnJcgNKBY4fP86jR4/w8fFh1apVvHz5kuPHjxcKot25c4ehQ4fKgqFKSkpcvnyZkiVLoqysTLt27ahevTrOzs6cOHGCDh06oKamxvnz52nbti1ubm5UrlyZtLQ04uPjWbp0KadPn6Zhw4YsW7aMdu3a4e7uzvr160lISMDa2pq+ffuSmpqKRCLByMhIFoR88+YNHh4ebNu2jdevX9OyZUsmTZrEoEGDFGaABwYGsnfvXvbv309sbCw1a9ZkxIgRWFtbU7t2bdLS0vDx8Sny3cPAwABra+tiG/kIggiICYIgCIIgV0ZGBhs3bpTbzSknJ4cFCxYUuom4evWqwrof5cqVY+LEiXJvXLKystDU1KR27do8ffpU4fn4+/tz6dIluWPKyspMnTq1UHZMeHg4+/fvLzK9TEdHR+F0DEH4N0pMTJQbKIuIiJA1hihRooTcrLLatWvLDRAL/x3h4eGyrLH4+HhatmyJvb09Q4YMQVNTk8DAQNzc3Ni3bx8qKirY2NjQp08f/Pz8FL5PHjlyhLCwMCZPnszMmTMpV67cF59PYGAgI0aMIDw8nGXLljFlyhRZd+AC6enpXL16lZCQEHJycqhevTqamppYW1tjbm5O7969mT59OosWLVJYd7JAWloaM2fOLBLYK9CtWzdatmwp+/8DBw5gZWXFjBkz8PLyokSJEgwfPpwVK1bI1snLy0NFRQV3d3ccHBzw9PRk7NixSKVSdu7cKbfZS05ODsuXL8fGxoaLFy/Stm1b9u3bh1QqpXPnzrx48YKgoCA0NTVxd3dnwoQJrFu3jo8fPyq8tmPHjuHk5ISLiwsSiYSzZ89Sr1494FMwydXVlZkzZ9K0aVP27dvH/v375TaLCQ8P59KlS7Rt25YTJ05QtmxZ1q5dy9ChQ5FIJDx79oz58+dz8OBBjIyMWLJkCZ07d+aXX37Bzc2N1NRUbGxscHR0xM/Pr0hRey0tLQICAvj1119RU1Nj+PDhTJgwQVY77Y9evHjBvn372Lt3L0+fPqVcuXJYWVkxfPhwmjdvXui7xLFjx4pkJBbo3LlzsfVPpVIpsbGxxMfHU7JkSWrWrFnkd1H4bxPhUkEQBEEQ5AoKClLY2lxVVVXWkarAs2fPFO7r3bt3JCUlybpHZmZmcu/ePVndsBYtWmBvb1/s+ZiZmXHkyJEihY0BmjdvXigYJpVKOXfunNxaSx8+fODWrVt06dKl2OMJwr9F2bJladWqFa1atSq0PDc3lxcvXhQJlF27do03b97I1qtcuTJ16tQpEiirXr26uDn8DzAyMmLVqlUsWbKE48eP4+HhwZgxY3B2dmb48OHY29vj5eXFypUrcXd3Z8uWLfj4+DBjxgyF+2zZsiX16tVj69atbN26FWdnZ6ZNm1ZsUf0CjRo14v79+8ybN4+ZM2dy4sQJfHx8CtVz1NTUxMLCAgsLi0Lbnjt3DgsLC7Kzs5k/fz4LFixAW1ubKVOmKDxefn6+wmAYQEhISKGAWLt27YBPGV3v3r2jZcuWRQr9p6enA//XibigLpZEIkFJSYlr165hZmZGyZIl0dXVpUmTJlhYWCCVStHX1yc2NlYWyPPy8sLPz4/z58+jqanJvXv3cHJyYsKECdy/f79QJ8nfy8vLw9bWFmdnZ/T19Tl9+rTsPH4/RXL69OnY2toybtw4WrduLfdvuqABzbFjx5gxYwazZ8+mZMmSREdHs2jRInx8fKhatSo7duygR48ebNy4kXHjxpGXl4ednR3Tp0+nWrVqHDt2TG6Hx7S0NPLz81m3bh2jRo2S+3sSGxuLr68vwcHBhISEEB4eTrdu3VizZg1du3aVm2Gbl5ensIkBfJq+rigglpaWxq+//kp0dLRsWalSpRg0aNBnO2IL/x0iICYIgiAIglzFtS2XN/65IscF46mpqXh5eck6hUkkErp160ZqaqqsSLE82dnZeHl5YWdnJ1tHQ0ODFi1ayG5gfn9uf5xu83vh4eEiICb856moqGBoaIihoSG9evUqNPbhwwfCwsJ49uwZoaGhPHv2jBs3buDt7S0LhKurq2NoaCg3q0xXV/dbXJLwF6ipqTFo0CAGDRpEZGQknp6e7Ny5ky1btmBubo69vT3Tpk1j9uzZ7Nq1S24h+QISiYT9+/djamqKgYEB69atY/PmzUyfPh1HR8fPZh1qaGiwZs0aevfujY2NDQ0bNsTNzY3Ro0cXOwWydevWXLx4ke7du5OTk4OzszNTp05FW1ubcePGyd1GXkZUceOVK1fGyMhIFjTW1tYuMl2/ICBW8O+CovrwKVv60aNHvHz5kpCQEJSVlfHx8eHjx4+0a9eOHTt2MHz4cOrUqcPbt2+ZPn06I0eOpGvXriQmJjJo0CAaN26Mqqoqvr6+2NjYyG3+oq6uztSpU2nTpg2HDx+WdfH8/RTJY8eO8ebNG5o2bUrz5s2LDXCbmZmxd+9eatWqxZs3b5gzZw7u7u7o6uqyYcMGevTowaZNm5g8eTKqqqpMnjyZKVOmyIKNUqmU4OBghftv3rw5jo6OhZZlZGRw4sQJjh49SrVq1dDU1ERfXx99fX169+7NoEGDMDY2VrjPqKgocnNzFY4X/HzkOXToUKFgGHya6r5v3z4mT54sssh/EKKQgCAIgiAIcn3uhveP438saP972trasik1ly5dkgXDfu/du3f4+fnJ3T4vLw9/f390dXXR1tbm/7F332FRnfn//58zzNB7b0oXBBs2sKDGBhrRmBhjN/aoSVw1MdnUTTEb48aYaEysibFrYjfYGyoKiqKCIB2lg0iHmWHm9wdyviKDyX72U5L87sd1cSFzzrlPmXFXXnnf73vx4sXMmzePxYsX079//xa/QP1WRwjRMUL4/zsrKyt69OjBpEmT+PTTT9mzZw83b96kurqazMxMjh49yhdffEH//v0pLy9ny5YtvPzyy/Tq1QtbW1scHR0JCwtj5syZ/Otf/+LQoUPcvXv3N8MH4Y/B29ubpUuXkpOTw759+3BwcGD27Nm4uLiwYMECOnbs+NTwpKlf071799i/fz+enp706tWLjz/+GG9vb/71r389NYxo0r9/f27evMmYMWOYPn06o0ePpqio6KnH9OzZk9OnT5ORkcHZs2eZPn06s2fPZteuXXr3t7a21ltZ3MTDw0PvdV27dg0bGxt0Oh3p6enN/n+jurq62femQEyn03HmzBnq6+sZNmwYBgYG6HQ6li5dCkCnTp0oLCzk/fffB2DBggUYGBiwYsUKtFotkyZNorq6msjISL755hv8/f3Zu3cvQUFBGBoaUldXR25uLiqVivfff58xY8YQFRWFlZWVNEUyLCwMFxcXjh49yvfff8+cOXNaTPnUZ+TIkdjY2PD3v/8dHx8ffvrpJ/7xj39w7NgxEhISCAoKYvv27bzzzjtkZ2fzz3/+UwrDVCoVO3fufOrf/6YVKxsaGjh58iTTpk3DycmJiRMnSlNiH6dWq/n555+bfY60Wi1Xrlzhvffeo3Pnzvj5+en990STx4PKxxUUFJCVlaV3W11dHQkJCa2OKfy1iAoxQRAEQRD06tChA6dOndK77Lq5uTkBAQHNXuvduze3bt3S+0vQM888g1wu/83/gnz79u0WlSxJSUn8+uuvVFdXM2XKFLRaLdeuXaNfv36tVhLY2tpibW3Nw4cP9W7Xtxy7IAggl8vx9PTE09OT8PDwZtuqqqpITU2Vpl4mJycTHx/Pzp07pWBAoVDg4+Ojt6rM3t7+NxugC/+7lEolzz33HM899xzZ2dls2rSJjRs3snbtWl5++WW9lUlNjc4BqZowLy+PO3fu4O3tjYeHBz/++COJiYnSyn69e/dudcEUS0tLNm3axKhRo6Sm/+vXr2fUqFGtXndwcDBnz55l0KBBqNVqxowZw6RJkzA1NSUyMrLZvnK5nGeeeYYDBw60GMfY2LjZdMkmTZVcffv2pbKykoqKCkpKSqRVYpv+f66qqgojIyNsbW2BxoqlnJwcAIYPHw7A5cuXSU1NxdPTk59//pnJkyfj5+fHkSNH2LVrF1u2bMHe3p6PP/6YY8eO8emnn/L+++/Trl07MjIyOHr0KNnZ2bz77rsYGxszYcIEPvvsM5YsWcI///lP5HI5ZWVlTJs2jQMHDvDGG2/QpUsXIiIiMDIy4siRI/j4+DB58mQiIiJaDTrj4uIYN26cVHk3fPhwVq9ezfvvv4+joyOff/45c+bMwdzcXDrm3r17fPfdd6xZs4by8nJmzZolTdvU9z4vXryYHTt2kJ+fj6+vL4sWLSIsLIwLFy7oPUatVnPt2jVKS0s5dOgQR44cobCwEFtbW4YPH867776Lu7s7J06caHGsTCZrMY28ydNaPAAtGvQLf12iqb4gCIIgCK3KzMxk9+7dzXqJmZmZMX78eL3/6C0pKeHYsWPS9BJbW1v69+9Pp06dgMaeRk3/pVwfmUzG+++/L/3SnJ2dzebNm/VWdD3ZCPlJSUlJ7Nmzp8Xr5ubmzJ49WzQSF4T/JjqdjtzcXL2N/bOzs6W/vzY2Ni1CsoCAAHx8fPSuMCf839BoNBw9epT169djYmLSbIVghUKBUqnku+++IysrC3Nzc6qqqjA2Nqaurg5zc3P69u1LaGhoi3FfeOEFvatKPq6oqIhZs2Zx8OBBpk2bxsqVK59a3ZWcnMzAgQOxtLTE29ub06dP8+uvvzJw4MAW+yYkJHD69GkqKioAyMjIoLKykr1797bYNycnBw8PD0aOHMn169e5d+8eMTEx0n1duXKF0NBQpk+fzpkzZ8jIyABg48aNzJo1CyMjI8rKyjA2NubZZ5/l119/Zdy4cezZs4e7d+/i4OBAUFAQgYGBREVFceLECSIiIpg3bx4//fQTdnZ2ZGdns3v3boqLi5k3bx7m5uYMHDiQQ4cO8fXXX/Paa68BEBsby9ixY6moqGD16tUcOnSInTt3MnbsWFasWMHy5cv55ptv0Ol0DB48WG9PrczMTHbs2MHcuXMJDw/n+++/5+DBg3h4ePDWW28xbdo0jI2NgcYqrVOnTrF8+XJOnDiBTCZDp9MRHBzMnDlzmvUnbKLVavnxxx+pra1l3LhxTJo0iR49eiCTyTh79iznzp1r9T2OiYnh2LFjtG/fnsjISEaMGEGvXr2k1SN1Oh2XL1/m3Llz0n/AMzMzY9iwYQQFBaHT6cjKyuL8+fOcO3eOc+fO0dDQwLRp01o9Z1hYmN7PkPDXIwIxQRAEQRCeqq6ujsTERMrLy7G1tSUwMBBDQ8OnHlNfX49arcbMzKxFRcj3339PYWGh3uPc3d2bLW2/Y8cO7t69q3dfMzMzFi1ahFzeegeI1NRUzp49S15eHgYGBgQEBDB48ODf1fhZEIT/XG1tLWlpaS2CsuTkZCmYkMvleHl56a0qc3Z2FlVl/4fu37/Ppk2buHjxIgUFBSiVSqZNm8a4ceOIjo7mq6++4vz585iYmFBbW4uXlxdTp07VO1ZdXR0BAQFMmjTpqdMxdTodP/74I6+//jr29vZs3ry5RZ/Ix6WlpTFw4ECUSiXu7u5cu3aNkydP6g3ldDod27dvZ9asWTg6OpKdnU16ejre3t4t9vX09CQgIIBjx44BsGXLFiZNmgTA6dOnGTRoEM899xzFxcVShdOkSZM4cOAA/fv35/Dhw+Tn5+Pm5oaZmRkmJiaMGjWK9evXs2DBAjZs2MDt27cxMDCga9eudO7cmfT0dOrq6igsLOS7776jtraWRYsWYWlpSYcOHbh27Rrbtm3jhRdeQKfT8fXXX7NkyRK6du3KK6+8wjvvvENdXR1r1qzB2tqaSZMmUVpaikKhQKPR0L17d95++20KCwspLi6mqqqKq1ev4u3tzdChQ9mwYQMnT57E39+fv//970yYMEFqZl9WVsbGjRtZvXq11HvL2NiYKVOm8Oqrr0qL7MTExHDy5Em0Wq10XE1NDc8//zyDBw9GoVBQWFjIwYMH2bdvH+np6UyYMKHV99fExIQXXnjhNyu7VSoVeXl5yGQyampquHDhghSC3bt3D5lMRseOHenfvz/9+vUjNzdXbxW5XC5n/vz5UtWf8NcmAjFBEARBEP5XtVa5BTB+/PhmDXS//PJLqqqqWh3rb3/7m9RI+Gk0Gg1yufyp4ZkgCP97dDodhYWFeqvKMjMzpUU4LC0tadeuXYugzM/Pr9UFOIT/fg0NDZw4cYJ169Zx8OBBDA0NGTt2LLNmzcLExIRvvvmG7du3M3jwYEJCQlodZ9u2bRgYGPDRRx8xZsyYp/5vcmZmJtOnT6e0tJTIyEiGDBlCly5d9P4HjaysLAYOHEhDQwOOjo6kpaVx9uxZOnfu3GLfpkqptLQ0cnJyCAkJ4fLlyy32mzp1KnFxcdy5cwdra2v+9re/8eGHHwJw+PBhIiMjCQsLw9nZmd27d6PT6XBzc6OgoICVK1cyfPhw1q1bx/Llyxk0aBDnz5/n7t27FBYW0qtXL5YvX86rr75Kv379yM/Px8nJibt371JRUcGHH36IoaEh7777LtbW1ri4uFBQUMDBgwfp27cvZWVlTJ8+nf379/Paa69RW1vLhg0bCA8PZ9myZSxevJhTp04BjVV91tbWLFu2jClTprBnzx4++OAD0tLSGDduHAMHDuTHH3/k0qVLdO7cmXfffZfnn39eCi2vXbvG119/jVarxcfHR5qi6ePjw/Tp0zEzM6OmpoaDBw+ybds2jh49CjT2JIuMjGTMmDGYm5uTmZnJvn372LdvHxcvXgQgMDAQc3NzevbsKa1C/Thzc3Nee+21Zv8RTqfTodFoUCgUyGQytFotd+7c4dy5c1IAVlBQgFwup2vXrvTr14/+/fvTt2/fZiFXYWEhW7dubfZvDLlcTmRkJF26dGn1cyn8tYhATBAEQRCE/3Xx8fGcOnVK6sNiZmbGkCFDWvzysmbNmqf28njrrbekaRyCIPw1qFQq0tPT9YZlj68e6+HhobeqzN3dXVSV/Q46nY7r168TGxtLaWkp1tbWdO/enR49ejw1qMrPz+fHH39k/fr1ZGZmEhgYyOzZsxk6dCi7du166rM/cuSItMJpp06d+OSTT4iMjNR7TFlZGVu2bGm2orFMJmPEiBF07dq1xf737t1j0KBB1NTUYG1tTXFxMefPn8ff37/FvklJSXTu3BlXV1dycnK4evUq3bp1a7bPpk2bmDlzJkqlEjc3N/r06cOWLVsA2LVrF+PGjcPf35+IiAhWrlzJ3bt3CQsLY/To0bi4uACNUwVv377NhQsXeP755/nmm2/o1q0bhoaGXL58mb/97W+sW7eOAQMGcO7cOTQaDbNnz8bW1pbPPvsMW1tbjI2NUSgUHD16lPbt2xMbG8tLL71EeXk5S5YsYf369RQUFPCvf/2LyspK3nvvPdRqNUqlEq1Wy+uvv877779PdHQ07733Hrdu3eLZZ5+lf//+7Nixg+vXr9OrVy/effddhg8fjkwmo7a2lp07d7Js2TLy8/OZM2eO3lUX/f39iYqKYu/evVRVVRESEsKkSZMYO3YsDg4O3L59WwrBbty4gaGhIe3atUMmk3H37l3q6+ulqZAODg5SL0JoXAzhpZdeknrPqVQqTp8+zY0bN6ivr0cul5OTk8P+/fspKSlBoVDQo0cPqQKsT58+T51uC43V7Ldu3aKwsBALCws6deokBa46nY76+noUCoU0PVP46xGBmCAIgiAI/yc0Gg35+flA4zL3+v7BGR0dzenTp/Ue7+/vz7hx4/5Hr1EQhD+WkpISvUFZenq6tMKdqamp3qoyf3//Zg3B///u+PHjxMTEtHg9ODiYkSNH/ubxWq2W06dPs27dOvbv349cLmf69Ok4OTnp3b9pit/Dhw8xNTXF0dGRrKwsevTowSeffMLQoUOlYEyn07Fx40Zyc3P1jtW0IuaT8vPzGTRoEKWlpVhYWFBfX8+FCxf0riT5xhtv8O2331JXV0f79u1JSkpqtj0tLQ0/Pz9pqp6joyOXLl0C4IcffmD69OlYWlry3nvv8eabb/Ldd9+RkZGh9zOWmJjIl19+yZYtW/jggw+kyrOJEycyfPhwfv31VwwNDYmMjKRNmzasXLkSOzs7NBoNHh4eREVF4eLiwjfffMOqVasYNmwYdnZ21NTUUFpayvDhw3njjTfIyspCLpej1WoZNGgQ33zzDQUFBbzzzjtcuXJFCov27NlDcnIygwYN4t1332XAgAHIZDLS09P56quv2LRpE7W1tQCMHTuWwMBAve9DSUkJx48fZ+LEiUyYMAFvb2+uXLnC3r17pemQpqameHp6Ul1dTXZ2NgqFgn79+kn9wHx9faX3PCsri9LSUqysrKRqNI1Gw7Vr1zh+/Lg0DfNxWq2Wfv36ERoaqje0q6mpITExkaqqKpycnPD393/qlF1oXODn3LlzlJSUIJfLCQgIYOjQob+rIl34cxGBmCAIgiAIf1hqtZpt27ZJ/UqaWFlZ8fLLL4teYIIgAI0Be2Zmpt6w7PGeha6urlIz/8eDsrZt2/7mL8l/JQ8ePGDVqlWtbm8tcGpNUVERmzdvZuPGjQwZMgR7e/sW+8THx3Pw4MFmrxkaGuLg4EBubi59+/bl008/pX///hQWFvL999+3er6mVRf19ZUqKipi8ODB5OXlYWJigpGREdHR0S3up7KyEn9/f3Q6HQUFBRw7doyhQ4dK23U6He7u7tjZ2VFSUoJGo6GoqAiA1atXs2jRItRqNVu3bmXixInMmzev1TAQIDIykt69e/P6668zdepUevbsSZcuXbh06RImJiaEhITQrl071q1bh729PRUVFYSFhfHLL7+g1WqZPn06SUlJjB8/vkVFXXFxMRs2bEClUkmBmrOzM++99x6nT5+me/fu9O7dmwMHDpCdnU1kZCTvvPMOoaGhNDQ08Ouvv/Lpp58SGxsLgIGBAZGRkbz55pvcuHHjqZXa8+fPJz4+nn379nHgwAHy8/OxsrLCycmJ4uJiysrKpFUhR4wYQXh4+FP/v1ulUhEXFyc1wL906RJt2rThpZde0ru/XC5n4cKFeoPI5ORk9u7dK4Xl0LjYz8SJE1vtEXb9+vUWn1No/HfH7NmzxVTtvxgRiAmCIAiC8IfW0NDAzZs3SU5OpqGhAQ8PD7p3746Jicn/9aUJgvAn8PDhQ+7evdusoX9KSgqpqanSqnRGRkb4+fnprSr7KwbvsbGxREVFtbp9wIAB9O/f/98eV6fTcfz4cU6ePCktqqLVarGxsSEjI4P169c3W7W4iYGBAXZ2dhQVFTFo0CBef/11rl+/3up5mlZgXLFiBbNmzWoREJWWljJ06FAyMjJQKpU4OTlx9uzZFn2qtm/fzsSJE5HJZLi7u5OTk9Ns+/jx44mJiSE3NxeNRkN5eTmWlpYsW7aMzz77jIqKCk6fPs2AAQOYMGECAQEBrV7z7du3iY+P59KlS/Tv3x+1Wk1OTg6Ghob4+fkRFBTE9u3bsbe3p6SkhIkTJ7Jp0yZu3LjBSy+9xMOHD5k3b16ri9qcPXuWgQMHEh4eztKlSzl48CCBgYGEhIQQFRVFYWEhL774Iu+88w6dO3emuLiY1atXs2rVKmlaqpOTEwsXLmTmzJk0NDSwYsUKqqqqcHBwaPW+vv/+ewoKCrC1tcXS0pK8vDxUKhUBAQFERkYSGRnZbFXIJ9XW1nLlyhUpAIuJiaGurg4LCwv69u1L//79sbe35/79+61ew5gxYwgKCmr2Wnl5OatWrZL6ET7O2dmZ2bNnt/jcNDQ0sHLlylZ7l/5X/14If1xiMqwgCIIgCH9oBgYGBAcHExwc/H99KYIg/AlZW1vTs2dPevbs2ez1hoYGcnJyWlSUbd68udlUPUdHx2YBWVN1mZeX15+2t9Bv1UQcPHiQ0tJSBg0a9G9NE5PJZISHhxMeHk5OTg67d+9m8+bN3L59Gx8fH95++21kMhlr164lLy9POq6hoYGioiJkMhnXr19n2rRpvP76662eJyIiAoVCwZw5czh48CAbNmyQek0B2NnZcerUKSIiIkhKSuL+/ftERERw6tSpZn2lxo8fz7p167h+/Tr37t1j27ZtTJw4Udrev39/9uzZI4Uq6enpBAcHU1NTg5GREdBYdZicnKx3xcLHxcXFsW7dOl577TXu37+PgYEBhoaGODo64uHhwfbt27G1taWkpIS33nqLpUuXsnr1at58803at2+Pm5vbU1d4HjlyJHFxcXz44Yd4enoyZswYTp8+zZYtW5g8eTJvvfUW7dq149KlS0RERHDixAm0Wi0ymYxBgwbxzjvvEBISwvr16+ndu7e0wvPAgQNbDcTy8/Olz1JFRQVdunRhwYIFzaZCPqmqqopLly5JDfBjY2NRqVTY2NgQFhbG0qVL6d69O/X19cTFxXHhwgUMDQ3p1KlTq/eurwfdjRs39IZhAAUFBeTm5uLu7i69ptPpuHv37lMX8snMzBSB2F+MqBATBEEQBEEQBEF4TFVVVbOqsqbKsrt370qLgSiVSnx8fPRWlembMvhHUlJSwrffftvq9mPHjhETE4OBgQGhoaFSyNWtW7d/e2qpTqfj4sWLrFu3jj179qDRaBg5ciQdOnTgxIkTevuYQeMqj15eXi1e12g0bNy4kYULF+Ln58crr7yCRqNh7dq1vPDCC832raysZPjw4cTHxyOTyejWrRtRUVHNpr3dvn1bWlWwqRl/U8By586dZv2z9uzZw5gxY3jjjTfYtm0bBQUFVFRUsGXLFpYuXcrs2bP13kt5eTmlpaUEBwezcOFCnJycKC8vlxq5nzp1Cmtra8rLy/nmm2+YOHGitIrkwIEDuXTpEu3bt2fUqFGtPuecnByioqLo3LkzFy9eRKVSMWPGDJYsWYKdnR2bNm3iiy++kMJeGxsb5s+fz5w5c7hw4QIrVqzg2rVraLVa5HI53bp14+WXX0atVpObm9uiP5dKpWLv3r1069aNyMhIvVMhdTodqampxMfHEx8fz7lz57h27RoNDQ04ODjQv39/wsLC8PDwoKSkhNjYWC5fvszt27fRarVYWVkREhJCaGhoqws9KBQKFi9ejLGxMSqVipSUFKmq/GmBtb+/P8XFxSQlJZGYmEhSUhIymYxXX3211WO8vb2ZPHlyq9uFPx8RiAmCIAiCIAiCIPwOWq2W3Nxcvb3KcnJypGoZW1tbvUGZr6/vU6t8/jcdOXKEq1evtni9Q4cOPP/882RnZ3Ps2DGOHTvGqVOnqKiowM7OjsGDB0sBmaur6791zrKyMrZu3cq6deu4ffs2np6eDBs2jPz8fA4ePNisabqpqSkvvfRSs4b4hoaGDBs2jB9//JFVq1YREBDAZ599xk8//cTevXuZPHkyq1atalbVVl1dzciRI6WG+AMGDODAgQPN3oeFCxfy7bffolar+fLLL1m0aBHQGOg4OTmh0Wiorq7mo48+4u2332bevHkcOHCAyspKKioqeO655zhw4AD/+te/WlQYNTQ0sG/fPj7//HNGjRqFi4sLhYWFGBoa0rFjRy5fvoylpSX19fVs376dNm3aMHbsWMrKyvD09CQhIYFRo0Zx5swZXn755VZ7X6Wnp7Nnzx4MDAyYO3cuixYtory8nI8++ohffvlF6qMVGhrKu+++i4mJCcuWLePcuXOoVCqgMSQaM2YMhoaGHDlyhNjYWORyOc7OzgQGBhIQECD1ZevTpw+DBw9uETqVlpYSHR3NhQsXkMlkUm+vpimnnp6eWFhYkJuby+XLl7ly5QoPHz5EJpMRFBREaGgovXr1IjQ0lICAAORyOTqdjj179nDnzp0W921oaMidO3dISEjgzp070n2OGDGC7t27t/pZXLduHSUlJQQEBBAYGEhgYCDt27cnJyeHyspKvccMGzasRaWp8OcmAjFBEARBEARBEIT/UG1tLampqXrDsoqKCqBxCriXl5fesMzJyUnv1K//KTqdjsuXLxMXF0dZWRmWlpZS8/Unq8DUajVXrlzh6NGjHDt2jGvXrqHT6ejQoQPh4eFERETQt29fjI2Nf/e5r1y5wrp169i5cycqlYrBgwdjbW3NkSNHmoVK7u7uuLi4oFarSU5OZsKECbz//vtST62YmBgmTZpEz549ee+997CysuLHH39k4MCBQOO0voSEBA4cOMCVK1dISkpixIgR7Ny5UwpzysvL8ff3p6SkBGNjY8rLyzEwMECn0zFx4kSysrKws7OjQ4cOeHl5kZWVRXl5OSqViokTJ7JgwQLS0tKIjo7mueeeo0uXLlhaWkrX9Pnnn/Ppp5/S0NBASUkJBgYGBAQEcOvWLczMzDA0NOTAgQNcv36dN954gzZt2lBcXIyFhQX29vbcvHkT+H+rKz/5OXnw4AG7du1izpw5zJ07l9OnT/PJJ5+QnJwMgLm5OdOnT2fkyJFs2LCBw4cPS8/Yzc1NWkHx6NGjUmWVmZkZ5eXlKBQKwsLCpH5gT06FLCgo4Pz589IUyNu3b2Nra8vcuXNRKpUt3vszZ85w7tw57OzsCA0Nlb569uzZbDprk7q6OpKSkkhISCAlJQWtVouBgQEFBQXExMRw79496e9PU5VYUVER9+/fZ/r06XoDaFNTU0aNGoW3t3eLQC8jI4Nt27a1WNHSzc2Nl19++U87TVrQTwRigiAIgiAIgiAI/0OaVjHUF5RlZmZKv3hbWlrqDcr8/Pz+7UVE1Go1cXFxJCYmolKpcHd3p3fv3q32gmqaJvd7lZSUcPz4cc6dO0dWVhalpaVkZ2fTo0cPqXrM39//dwV85eXlbNu2jXXr1pGQkIC7uztdu3aV+no9SalUotPpmDVrFn//+985ceIES5YsQa1Ws2jRIilw+dvf/kZ4eDhXrlxpdnxxcTFbtmzh+eefZ+PGjdJ9//TTT0ydOhWAt956ixdeeIGrV69KK0v+Fq1WS1VVFQcOHCArK4sFCxawe/du2rdvj0ajITY2lurqamQyGd7e3qSnp2NsbIyjoyO7d+9m2bJl7Nu3Dz8/P1JTU6Vn0PTrupubG8899xznz5+nR48euLu7o1KpSE9Pp2vXrowePZpvvvmGzZs3U11dDUCnTp2YP38+N2/eZPfu3dJqkU5OTjz77LPY2dmRn5/P1atXyczMRC6XU1tbi42NDcOHD9c7FfL+/ftSA/zz58+TkpICgJ+fHyEhITg4OGBoaNjqZ1YulzNmzBgCAgKafT50Oh05OTncunWLmzdvcvv2baqqqlCr1Tx8+JBbt25hb2+Pq6srZmZmaDQaSktLycjIkO7XzMyM9u3bSxVfTQslNFXAQWNPwAkTJjy1N15eXh4XL17k3r17GBkZ4e/vT+/evcUKk39BIhATBEEQBEEQBEH4P1BfX096erresOzBgwdAY8Pwtm3bSs38H/9yc3NrETppNBq2bNnSYsVEpVLJ5MmTadOmzX983SqVip9//pnU1NRmr6elpbF7925UKhVt27aVwrFBgwb95mqdOp2Oq1evsn79erZv305NTQ0hISFUVVVx+/btFvsbGBhgYGDA/PnzmT17Nl999RXr16+nc+fO9O/fnxMnTjB27Fi957p79y7bt2/ntdde4+uvv0Ymk6HT6ejXrx9yuZw+ffpITfP/Kx48eEBDQwM//PADL7/8Mt9//70UfLq5uZGbm4tSqSQwMJDPPvuMV199laKiIhQKhfR+NjXpVyqVjB49mnPnzlFaWkrbtm3JyMjA3d2dN998E09PT5YuXUpsbCwAxsbGjBkzBgcHB/bv309mZiYAJiYmdO7cGVdXVzw9PVtUY+Xk5GBra0tkZCS9e/dGoVCg0+nIzMyUwq9z585J47Vv356goCAsLCwoLy8nISGB9PR0AObMmYOLi0urz2fKlCkUFhZy8+bNZl/l5eVAYyXc888/3+w9qK+vZ8+ePaSlpWFhYSGFXkFBQdKf27Rp0yLYra+vJzk5maqqKhwdHfHx8fld4a9Op+P69etcuHCBsrIyDAwMCAwMZMiQIVhYWPzm8cKfgwjEBEEQBEEQBEEQ/mBKSkqkZv6PB2Xp6eloNBqgsSKmXbt2zUIyIyMjbt26pXdMJycn5syZ8x9PzTx06BDx8fF6t40dO5bMzEyp/1hKSgoGBgaEhIRIAVn37t2f2py/srKSHTt2sH79eq5evYqjoyOurq7cunVL78qBxsbGLFiwgEGDBvH2228THx/PwoULW60C0ul0rFixgsrKSt59910+/fRTioqK2LFjx2+uFvnvqKysZPXq1ajVarRaLXZ2dpSWliKXyxkwYAARERG8++67WFlZUVJSgouLC/n5+dLxoaGh5Obmkpubi4uLC7m5ufj6+vL6669TWFjId999JwWnvr6+hIWFcfXqVW7fvo1Op0OhUODn54etrS3x8fHU1tYyadKkVleAjIyMxMzMrFkF2P3793F1dWXIkCG4urpKoV1tbS3FxcXcuHGDiooKjIyMqKqqIjc3l/Hjx+Pn59fqc1mxYgUVFRXI5XJcXV2xtrZGLpdTUVFBUVER8+bNk3qPPc7AwIDx48fj7e39b32G1Wo1Go0GY2Pj333c5cuXOXbsWIvXbW1tmT179n8UmAp/HCIQEwRBEARBEARB+JNQq9VkZmbqrSorKipiypQpeHt7t3r8/Pnz/6NVMOvr61m+fLneYAogMDCQF198Ufo5KyuL48ePc+zYMU6ePElFRQW2trbNmvO7ubm1er74+HjWr1/Ptm3bqKysxM/Pj/v371NbW9tiX1NTUxYvXoylpSWpqalPbfqfl5fH+vXr0el0LF26FK1W2+o9/SfKysrYsmULarVaatb+4osvolar2b9/PxYWFqhUKjQajXR+JycnTE1NyczMxN7enpKSEjp06MC4ceM4c+YMZ86cQavVolAoCAkJoa6ujoSEBDQaDTKZDHd3dywtLUlOTqahoQGFQoFGo8HR0ZG5c+e2Ggrl5uayfv16DAwMaNeuHR06dMDd3f2p0wsBampqyM3Npbq6GhMTE0xMTGjbtq3efQsLCzlw4ABFRUVSsGtnZydVevn6+rZYmOBxw4cPp0ePHr/53KHx2R8/fpyUlBR0Oh329vYMGDCAoKCgpx7XtLhCfX293u3h4eGEhob+rmsQ/thEICYIgiAIgiAIgvAXUFZWxk8//fTUKqc1a9ZI08e8vLzo1KkTvXr1YuDAga32GHtccXExa9asaXW7gYEBQ4YMoXPnzi2a7Dc152+qHrt69Wqz5vzh4eGEhYXpbc5fXV3Nrl27WL9+vbQyo4GBAWVlZS32NTc3Z+HCha1WoRkYGLB48WL+9re/ERcXx8iRI/9HFzSoqqrixx9/pKSkhClTpnD+/Hny8/Opr6/HxMRECvcUCgWurq7k5ORgaWlJRUUF3bt3p0uXLkRFRZGbmws0BmYODg6kp6dLx9rZ2WFsbCzt0zQN1MfHh9GjR9O3b18ePHjQYirt4yoqKvj1119JT09n4MCB9OrV69+6z6KiInbu3IlSqSQ8PLxFMFtTU8Pt27fx8vKSwi9TU1PKyspIS0sjPT2d6upqPD09Wz1H3759GTRo0G9eS1VVFWvXrtUbro0ePZpOnTpJP2u1WkpKSsjPzyc/P5/MzMyn9o5r164d48eP/81rEP74RCAmCIIgCIIgCILwF3Hq1CkuXLigd5tOpyMuLo7U1FQKCgqoqqri8V8HDQwMsLKywtXVFX9/f7p160a/fv3o2bOntGJgXV0dy5cvb7EKX5OUlBR27NiBQqGgU6dO9OjRQ/oKDAxstkpfSUkJJ0+elAKy/Px8TExM6N+/vxSQPdl8HeDmzZusX7+eLVu2UF5ejo2NTYtgzNnZmVmzZukNxWJjY+nXrx8jRoxgz549/yure5aXl5Ofn8/PP/+MQqFApVI1e4aOjo4UFRVhamoq9U8zMjIiJiYGtVrNMzIZp1v51T0UiH0UgBkYGNCrVy/Cw8OxMzBgwkcfYVVfzxhgn1zOG2+8ITWHN9BoeObMGTolJGBcV0eWhQVvazSo+vene/fu/+8EWi3drl2j+9Wr2D54gMrQkHwXF87368f9xyrBPDIzeXnzZr3XuHPBAjKdnEhPT2/8Sk3l2dxc5gC+QDWQam7O6T590D0WxD1tTGJioJVKraf9PdBoNCQkJEgBWGFhoVStBo193mbNmqX/nDT2T2utP53w5yICMUEQBEEQBEEQhL+IyspK1q5dK62897hnn322WdCh0+lIS0vj9OnTxMbGkpiYSE5ODqWlpc1W5oPGPl329vZ4enrSp0+fVlcRHDt2LFVVVcTFxUlfSUlJ6HQ6TE1NCQ4ObhaS+fr6StVMt2/flsKx8+fPo1KpaNOmTbPm/DY2NtK5ampq+Pnnn1m/fj0XLlzAyMgIlUolhXzt27dn5MiRza41OTmZAwcOoFQqee2116Sg739Deno6W7dubRZCNgVgRkZG1NfX06lTJ8rKyqQVNk1MTFAqlQRXVHAWWCWXE29ggFqtRgbogGhTU7oOHYqHhwdZWVlcvnyZwsJCvgamA+bAy+bmRJma0q5dOwYPHgzA8z//TGBSEldCQym2tsbj7Fk61Nby08svc8/DQ7rGoceO0SsmhoROnchp2xbjujq6XbuGVXk5m6ZPJ8/dHfh/4dXFbt04WV5OYWEhFY+miR4FGqyt8fX1xcfHh1ezsuh75QqFQ4diNGgQVjodsnXr0OXksGfBAu48alzfNOaVkBAe+PgwZMgQtDodDx8+JMvfn3u1teTl5ZGZmUlmZia5ubkUFhYSERHx1CmzN2/exMbGBhcXl2Zfrq6uODg48N1330lTXJ/03HPP0blz5//CJ0D4oxGBmCAIgiAIgiAIwl9IaWkpUVFR0qp/VlZW9O/fn+Dg4N89Rl1dHRcvXiQ6Oprr16+TmppKXl4elZWVKJVKxo8f32xqm1arJT8/H1dXV/r27Uvfvn2lqY9VVVXEx8c3C8kyMjKAxql+Q4cOxdfXFw8PD5555hm8vb2pqanh3LlzUkCWnJyMXC5v1py/R48eUgVYUlIS69evZ/PmzdKqgA0NDSiVSnx8fDAyMuLBgwc4Ojpy7do1Xn755adOzfufcujQIa5du4ZcLpf6gGk0Gjw9PSkoKKCurg6g2VRKuVzOAOCUVssY4BfA1dWV4OBgqqqqSE5OprCwUDqHXC6nu7ExF2pq+Bj4BJigVLJbq0Wr1TJgwABe9PBg7o8/cnzIEKKCgjh06BAFWVlkWVhQa27OppkzAZA1NPD3f/6Tu+3a8fNjVVHWZWUs+PprroSEcHTYMOD/hVe7X3yRcw4OODs7SwGYr68vtra2jQdrNGBpCc8+C3v2AI3hbNWtW1h07kzWyJHsCA1FpVJJY/4UGckXGRmkp6dLz+hppk+f3mofM4DXX3+9Wbj6pOTkZHbv3s2TcYmXlxeTJk36XStVCn98IhATBEEQBEEQBEH4C6qtrUWtVmNhYfHfOi3w3r17nDp1ihs3blBWVkZJSQnx8fEUFBQ028/Q0BA7Ozvatm1LYGAgISEhDBw4EF9fXx48eEB0dDQJCQnNjqmvr+fUqVM4OztLVWTdu3enqqpKCsdOnTolTZUcMmQI4eHhDB06FHd3d+rq6ti7dy9r167l/PnzUvXZ43r06MGzzz773/Y8/h319fWsXr2a6upqdDodNjY20kqRcrkcnU4nXe/j1z4AOAMsbtuWX6qquP/gAU1LAMjlcmkaZtPPJ7RaSoA1wFlguoUF2+rrpX1WKBS8ptHQ0dWVlEcrWz7zzDO8I5Mx6NQpvlq4kAorKxRqNe8uXUpsjx5EPfbMlCoVb//zn1zq3ZtTQ4YAzQOx+gEDmDx7NjoDA0pLS6XpiTk5OWTducOnK1bwi7Mzb5ub8+DBAyoqKjDUaKgAlgPvyGR4enoyRKlk7d27jJXJOAYoLS2xsrPD0dERNzc3PD098fLywsXFBWdnZ5ydnXFyciIhIYETJ07ofQ9+72qrOTk5HD9+nMLCQgwMDPD29mbkyJF6e9wJf04iEBMEQRAEQRAEQfgLys/PJyMjA5lMhp+f3+9qmv+fUKvVXL16lbNnzxIfH09KSgq5ubmUl5c3W8FRJpNhbm7OzJkz9a5gqNPpiI2NJSYmRlogwNvbWwrIunbtikajITo6mmPHjhEXF4dOpyMoKKhZc/6cnBw2bNjAhg0bmi00MH/+/P/xZ/E058+f58KFC1I4pS+0A1Aqldja2lJfX0/nhw85C1QCFoAGiAbeBOIfO97Y2JjntVo2qFS0BzxpDMSaKsvkcjkODg7sq6rCqaGBUY/CyYKCAmbMmEHf+nqm/PQTO8aP566/PwAz1q/HsaiIw5GRZD+aMtnv/Hm8MjNZP2sWDx9VfjUFYvWGhhipVM2u8doT9xYDdABeAeKMjWljYcESlYqQmho+Hz0ayy5d8PT0JKCggOBFi9CZmyOrqgIDAwgLo/of/6CwTRvMzc1xcHBoEW6p1Wp++OEH8h+FfU0UCgWTJk3C47Epofo0NDSwe/du7t692+x1JycnpkyZIvVhE/7cRCAmCIIgCIIgCILwF6LVajl48GCL6quePXsSERHxv9JE/kklJSWcPn2amJgYbt68SU1NDREREa3un5iYiKGhIZ6enpiZmVFQUMC1a9fQXr3K2/X19AVMZDJKraxI6dePKyEhpKSkcPLkSTzy8viXTEZXmYwGMzNqnn2W8xERfLNpE9nZ2UybNg2HoiL6nz2La14e5lVVqJVKih0cuNSnjxQEPS7w9m16xcRgX1KCVi6nyNGRS336kNqunbSPeUUFQ06cwDUvD4vKSnQyGaV2dsT17ElC587w6LlXVVWRsWIFs7VaOgJ2QDFwGViqUJCiVErTJfvTGGi1Rg30BG48+tkYSAa2A8ttbBiiVLKrqIiXzczY/FhfuUSZjEozM74YOhQfHx88PT0pLi7GvqiI+WvWcHjECK496jdnU1rKmJ9/xvWxcOmBjQ3bJ06k1N5ees09J4deMTGk+vlRY2pK/fXrjExNxQx4f+BACA7Gx8cHpVKJ+s4dIrdtazYm3t7w66/w+PO/dAlWrIDhw8HeHs2tWzQsW4a8tpZNM2ZQ4OKCm5sbzz33HPaPXQs0VuPFxMSQmJiISqWibdu29O3bFycnp6c80UYxMTEcP35c77YuXbowatSo3xxD+OMTgZggCIIgCIIgCMJfSHR0NKdPn9a7bcSIEXTr1u13j1VRUUF8fDzFxcWYm5vTpUsXXFxc/uNrjI+P59ChQ61uj4uL48iRI81eizQyYk99PalmZkS7ulKrUGBdUsKDkhLe1OkwNDRkjJ8fPyQnk2thwR4bGxqys1mo1RJjZMS2SZPw8fFBpVLhe/cuIVeucL9NGyotLFCq1bRPSsIjJ4dDI0YQ/9jiAz2vXGFYVBR3/fy4264dCo2GLjdu4FxYyK6xY0kODATAsaCAYVFR3GvblnIrK+QNDXhnZBCQkkJ0376cftTMHsDu22+xLy7mOvBAJsNRp2M64AL0Am4+2s8RGAJSA/0mk4FwoA6INjBgrIUF9fX1vFVbyyygHWBka8tYR0e+S05m66hR5PbqhZGRETU1NcxetoxsExPmPwrCjIyMeOmll7B+8IAF33zD0fBwrjxa7dGsqoohx49Tb2REprc35lVV9LlwAbVSyQ/TplFrZqb3PQwODmZkYCB06gT9+sHRo0RFRREbG9tiTE9jY3qeOYPM1BSio+GJcKvJnj17yI+OZu5335Ht4cG2yZMBsLCwYP78+RgZGbX6mdJHo9FQW1tLTU0NNTU10p/PnTtHVVWV3mMUCgVvv/223hVMhT8XxW/vIgiCIAiCIAiCIPwZ6HQ64uLiWt1+5coVunTp8rt+mc/KymLHjh3NVpyMjY0lPDyc0NDQ/+g6pQbrrXBxcSEoKEhqom4BrKuv5wgwproaXWqqtK9cLsfczAwLCwtmpqfzQKej88OHVD58iLm5OXWWlnyUl8f6/fspnzkTExMT0tq1I+2x6i6A2J49mb12Lb1iYloEYrmuruyYMEGq8roeHMyiFSvonJAgBWJFzs5snjat2ZhxISGM276dkCtXODNwILpHzdhP9u7NgQMHGnd6VKOyAbgPzH30BVAEbKNxWqVCocDAwIDAwEA6pqRwX6fjpkLB4MpKunbujLNazVuXL7PcwwMPE5PGlRizsgDYf+AAvzSdDxgJPKyo4EphIUqlknaPnoVCowFA82j1TVlDA5N/+olsT0+ihg+Xjs/w9mbet9/S59IlTj7qIfYklUoFvr4wahTs3UtZSQmxsbF6x0wGXCZPpu2wYei++AL1p582C6hqamooLi4mKSkJ7OxIDgig/Z07yLRadHI5lZWVfPrpp1RXV0v7P3m8vp/VarXea1+0aBGWlpZ6t2k0GlQqVasrrQp/HiIQEwRBEARBEARB+IvQarVUVla2uv3+/fsoFAqMjY0xMzPD1NQUMzOzFl+mpqZ4eHjoDc6OHTtGbm4u9vb2zfZv+rOxsfFvTsv08PDA0dGRoqKiFtuMjIxYuXIlZmZm6HQ6SkpKqPryS5yXLaPkb39jemUlGbducSstjdKyMrRaLVVVVciqqugLfEVjry2Ampoavqyv5w1gWGUlGU8JMXRyOeVWVrjl5ja/nvp6Su3spDAMQGVsjMrQEI3it3+lLre2RqlWY9DQgOZRIObq6tpivyKgBrB+4nVzc3NMTU2xsrJixIgRtK+sxDU+nrMDBmClUmF46RJx586xGrgHbMrMRCaTYSKX4/DofG5KJb46HekaDTogH3B7NL5arZaCIYtHVVGVFhYAeGRn41RUxPHw8GbX9MDOjmIHB9rk5LR63ydOnGDZsmXMzcxkukrFlBdeoPvAga2O+cqXX/K5Wk3F8uWELV/eYrwOHTowZswYACosLVE0NKBUqVA9anKfmZlJfHw8pqammJiYYGpqiqmpKdbW1piYmGBpaYmZmRkKhQKlUomFhQXm5ubN9m069urVq+Tl5em9LysrK9FY/y9CBGKCIAiCIAiCIAh/EXK5HEtLSyoqKvRut7S0ZOPGjVRXV7f4qqmpobq6moqKClQqFd7e3q2eZ82aNa1Oy5TL5ZiYmOgN2h4P0CwsLLCyskL+KLRpOtbNzY2LFy8229/91i10FhZMDw9n9oIFcPcumJmhnTWLnIULycjLo/r4cZTLllEbGIhbeTnFxcWoVCqqtVpuAF3lcjKeuFalSoVCrca4vh7/lBT8UlO53aFDs32yPD0JTEqi55UrpDyaMhkSG4txXR1X9FTKKdRqlCoVhioVntnZdLl+nXtt2khVVwAODg4oFArMNBqUgKtMxgKdDivg1BPjVVVVUVVVxbBhw7CysqLTpUsA3OzYkcEnT6JWKHBwd6dtVhZ+QCY0Vp01NDR+AV+r1XwN9PD15SGQWVzMM+Xl+Dk58VCrlXqWud2/D0CBszMA5o/6jsm02hb3adDQgFzP603u3r3L1atXsQRqgfsPH9L9KWMqFApMFAq0BgYE+fpiYmLSLKx6vEeYTVkZaoUClaGh9NqUKVPYunWr3ms5e/Ys586dk35Wq9XU1NTwwgsv6A0nbWxs2Lx5s96xevfu/X/Sh0/47ycCMUEQBEEQBEEQhD84rVZLeno6eXl5mJiYEBgYiLm5eYv9ZDIZ3bt3bzWsGjJkyO/qIZaQkMD+/ftb3f60Vfa0Wq0UsjUxMDBALpcjk8lahAk+Pj7Y2dlRXl5OcnJysymaTW4AvgDDhvGdTMYlQ0MGqNXMWbeOm1u3sqRNGyLr64kEHpqY0M3XF3Nzc4yMjNBqtShPn8atpKTFuEOPHaP7tcY1ELUyGXfat282NRAgatgwTGtqGBYVxbCoKACqTU35aepU7rdp02LMkMuXGXzq/8VaGV5eHHjuuWb7yOVybG1tOVdURACATkcl8Amw8YnxZDIZPpaWeHl5IdNqCbp9m/tubhiq1finpJDq50eXoCDey8riyc5bHYBPgWU0rux4Iy0NzaNzzAZGFhby5aN987Oy6HLjBvfd3Kh4tPpnqZ1d4zi3b5Pu5yeN65yXh11pKfGPfZZMq6upeayf2FtvvcXqWbNoP3UqtQMGsOe779ixY0erY66fOxenI0dg5kxuf/dd44vFxfBoRdCGhga+/vprTFNTpfvmsTC1Y8eOLd4LaKyKfDwMa1JXV8fevXuZP39+i8+kp6cnzz//PFFRUVJYaGBgQFBQEBUVFZw7d4727dvj6Oio95zCn4MIxARBEARBEARBEP7Aqqqq2LZtGwUFBdJrx48fJzIyks6dO7fYv0+fPpSWlrZYZTIkJISuXbv+rnM6P6oQas3rr7/OypUrqa+vR6VSUV9fL339V3+uq6ujTZs21NbWSl91dXXU19djlZODWUMDG5VK3lYqUavV7Ho0/e+VmhoWp6TQ9HQuXrtG3KOQq8lAoIOe6Y2XQ0NJCgzEorKSoMRE5FotBo+qqpqolUpK7e2psLTkbrt2GKlUhMbEMHbXLn6YNo2yRwFPk9sdO5Ln6opZTQ1+d+9iXlWFQk+vKqVSyTTAEvAGpgEmgAGgeWw/nU7Hj3V1OGzbRrWpKebV1RTb2zN940bUSiWnBg/GTqUirk0b8pVKDA0NMTIywtjYGM+6OkhIwCAkBCt/f6YZGaFUKlEoFCQcPcqytDRG9ezJA1tbgk+exPrhQw6OHCmdO9/VlXRvb7okJGBUX0+6jw8WVVX0vHIFjULB5ccq5Mbs2YNaqeR+mzZUm5lhd+IEfrGxqBQKdnTqxIO9ezEwMNA7plVNDVbLljXua2PDw6+/BuD5b79Fo1RS4OVFrYUFA/Ly6BATI913EzMzM6knW1O41RS+Pnz4sMWzb1JaWkpubi7u7u4ttnXs2JH27duTk5NDXV0dly9f5ubNm9L2s2fP0rt3bwYPHiwqxv6kxCqTgiAIgiAIgiAIf2Bbt24lPT29xesymYxXXnml1SqVgoIC0tPTkclktGvXrtmUs99j+/btpD7WvL6JhYUFU6ZMAZDCLH3ff+9rv2f/tZcu4V1Tw0w/P2KUSmlbcGUl+8vKmGVoSIVGwy6tljDgwhPXvAsYZGjI6nfeeeo9T/rpJ4zr6tgwa5bUM2zC1q1o5XJ2Tpgg7WdcU8Nrq1aR4e3NLy+++NQxRxw8iG9aGqtfe63ZtMkNGzZw/9EURWjsHXYH2Aq8+cQYi5RKFjo44FxYiEFDA9VmZqT7+HCuf3/K7Oy4cuUKx44da1aBJ5PJ6KfVcrKhgfEKBfsMDJoFN8bABxoN4zUarIFEuZwbL7zA/aCgZudWqNX0vnSJoNu3sSkro8HAgGwPD8488wyFj6042vPyZTreuoXtgwcY1tZSLJNxzsCAz42MyHhUyaXT6fDz8yO0c2eeS0ujQ2IituXl1Gm1XDIw4BOlkoRH96DT6Zir0TBOo8Fbp8MSKAYuGBnxa/fuaDw9qaqqIiEhgXv37knjN31v+vOLL75I0BP39Lhx48bh7+//1Pfw0KFDxMfH6902ZsyYp44v/HGJCjFBEARBEARBEIQ/qAcPHugNw6Dxl/6ff/4ZR0fH//YASqVSodVq6dOnT7NeYvn5+axevZrFixf/l+5HJpNhZGSEkZERhoaGUjWTvu9NfzY1NaXaygpqanDv1o0hTk7SNpfycli1ionDh5Pn6gpr1vDKyJF0btOGiooKysvLefjwIb5Xr1L2WBjVmqTAQCIPH8autJRSe3usHzzALy2NQ5GRzfarMzUlp21b2j6lqfzjY3aLj8cjO5t0X1/pdY1G02y/h8BpYCKwRCbDwMAAhUKBkZERO83MeNi/P2t++YUsT0+2TZ7c7NgBAwYQFhaGRqORmuSr1Wo0Gg3z1Wos1GrGP/r58e0nNBp+fexnV52O5h3UGlecPN+/P+f793/qfcaGhhIbGsr9+/fZtGkTWq0WNJrGr8dcv36d69ev892TAzzW86ypim2DiQmblUrpZ2XTn8vKUFZVoVQqcXFxoU2bNtK2J/f9rSB4z549GBoa6j1WqVQil8tJSUlp9fhr166JQOxPSgRigiAIgiAIgiAIf1BlZWVP3X7hwgV27Ngh/dwUOLUWNul7zdra+qn7K5VKDAwMMDIywtXVlT59+vxb4z++TfE7VmV8nE6n48GDB2jfegs2bqRX27bEW1uTn59PZmYmD+/cAeDHI0fYp1bzIpB78CDfAsbGxhgYGCBTq/FTqThpa/ub51M+Cm+M6uqA/6ypfGtjNnn8vW2q3DJ91Fhfp9Oh0WjQaDTU1dVRXl5O7a5dGGm13OrUSTquurqa/fv3k5qailwul94nU1NTbGxscHJywsvLi8DAQFxdXTEzM8PDw4NOnTq1+l4cPXqUK1eu/OZ96WNpacnbb7/Ne++9h0aj4f79+9y9e5eKigpMTExwcXHB2tpaurfHg7sng7z/6s8ajYb6+nqqqqpQq9Xk5eXRu3dvDB9rwN8kNTWV48ePtxhL+9j7amVlxcKFC1u95/Ly8v/SsxL+74lATBAEQRAEQRAE4Q/K9jdCnBdeeIFvv/1WCp4MnpgW90eiUqm4ceMGRUVFWFhY4OrqSnl5OXl5eeTn50vfn/yzSqWiC3AdKPniC/5la4uLiwt2dnbMKSpCAyQ6OKCrrORUZSWTaGxOX/UogJoOWAB7dDqaJsaZVlVR88SiBPKGBjolJKBWKCh+1Mj9ga0tWpmMoMRErnXvLk2jtCgvp21ODjlt20rHP9lUvklwfDw6IP+x6YV1OTnU19dLP+t0Ojxo7HV29bFjZTIZxsbG2NraMquigtrqatYWFSG7fJny8nLu3r1LdXU1BgYGaLVaKdSpqqqiqKiIlJQUKioqaNOmjVRpePPmTTZt2sTWrVuprKyUQjQzMzNsbGxwdnYmODgYq0eN9X8vJycnJkyYgKWlJQCxsbGcPXtW2l5VVUVxcTFhYWEMGTLk3xq7tLSUy5cvk5ubi5GRER07dqRLly7NVih9mvz8fPbu3UvJYwsrdOzYkXfeeQelnsrBx59lXV0d69atQ62nDxyA3RM95IQ/DxGICYIgCIIgCIIg/EHZ2Njg6+tLWlpai20ymYx+/fphY2PTYlthYSHZ2dkolUratWuHmZ6g5n+CVquluLi4WaiVl5dHcXExFhYWzap01Go1v/zyC8nJyQDY29vj4uKCi4sLAQEBPPPMMzg7O1NfX09OTg7H9+9n4v37GFdUcPrBA/oDg4HPgKt5eQC8L5cTrdVySalkt5UVVpWVzK+v5xhwTCbDS6XC0NCQEYcPY1RfT46HBxUWFphXVdHx1i0cSko4NnQoaiMjAGrMzLgRHEzX+HimbN7MnfbtMVKp6B4Xh1Kt5kLfvtL9hJ0/T5t790j39aXcygqT2lraJyXhlpfHlZ49mzXff2vbNjrSuHpmGeAHzACUwNuPPU+dTkdtbS01ubn0AX4Bjl28KG03NjbGwcEBPz8/OnbsSI8ePejSpQtubm5kZmZy5coVCgoKWoSkDg4OvPDCC/zwww/NQrTCwkKSk5M5e/Ys/v7+9OrVC09Pz6e+57W1tRQWFpKTk0NtbS1BQUHY2Nhw7NgxvftHR0fTsWNHHB6Fjr8lJyeHrVu3NguksrKySElJ4aWXXvpdoZitrS3h4eGUlZVhbm6Oq6vrUwM/uVwuTe01Nzene/fuxMTE6N23Z8+ev+s+hD8e0VRfEARBEARBEAThD6y6uprt27eT9yj0AVAoFIwcOZKOHTs221ej0bBv3z6SkpKk1wwMDAgPD6dHjx7/5WtoaGigqKioRdD1ZEVXQUEBDU+s0ujk5MTkyZMxf6IiCxqDhzFjxmBnZ0d0dDQXL17k+vXr3L17l5KSElQq1f+7Z+AdGldjdAVyDQw42LYtl0NC0Ol0pKenk5CQQE+1muUyGcFAlUzGbp2Ot3Q6auRyxo0bR7t27Qi6dYvg69dxKizEpLYWlaEhea6uxPbsyd2AgGbXKGtooPvVqwRfv47tgwcA5Lm6cr5/f7K8vKT9vNPT6XnlCi75+ZhVV6NRKCh0ciK+a1cSunSRqssAnL//nq4FBfjQWL1WLJNxTqfjM+C2nuc/B/geiAR+lcuRy+U0NDTwtF/nDQ0NiYyMbPEZedzcuXOlRRnKy8u5ceMGt27d4s6dO6SlpZGfn49Wq8XPz482bdpI01Dr6uooKSkhPj6e5OTkZlMMAYKDgxk1alSr501JSaGyshIPDw/8/Pzw8fHBwcEBe3t77OzssLKykhrrf/fddxQXF+sdZ/To0XR6bAqpPjExMZw9e1b6LJmYmDB06FC6dOny1OMep9FoOHDgALdv/793Ry6XM2TIEEIfW2lT+HMRgZggCIIgCIIgCMIfnE6nIyMjg7y8PExNTWnfvj2mpqYt9nta/6epU6e2qPbRaDQUFha2GnQVFBRgZmaGnZ0dWq2WlJQUqaLLyclJquhydXVt9r3pz05OTty7d48tW7a0em+nTp0iOjpa77amvmWdO3dm4MCBWFhYUFJSwo0bN7h06RKZmZkAuLu74+Xl1ew5NfXUaqosaqr0GTBgwG897v9RdXV1rFixolnY18TU1BQvLy+MjY1JTEyk7om+Y03kcjkKhaLZGKamphgYGEgLIwC89NJLtG/fvtVrcXJyonPnzvj6+mJvb/9vTbfV6XTk5+dz/fp1EhMTSU1NJTMzk/z8fFxcXAgLC2v12OjoaE6dOvXU8Q0MDHB1dWXGjBmt7uPv78+4ceNa3X7jxg0OHDigd9vEiRPxfWyRg9aoVCqio6O5efMm1dXVWFpa4ufnR1hYmN6QV/jzEIGYIAiCIAiCIAjCX4BarWb58uWt9jrS6XTcv3+/WehVVFTUrMpILpfj5OSEq6sr7u7uBAUFtWhG3qZNGyZMmICxsbHe89TX15OWlkZycjIpKSnk5OTg8lj/rCfFxcVx5swZ2rZtS0hICH379iUoKAhXV1eSk5O5dOkSly5d4vLly1RVVaFQKAgODiYgIACdTkdWVhaxsbGoVCosLCyaBUKWlpYEBwfT0NDAxUdTDRcsWIC1tfW/82j/W125coWoqKgWrzdVRDVRKpW0b9+egQMHUlRUxNmzZ5tVCT5JqVRiYmKCTqejsrISgKFDh9K7d+9Wj/nmm2948KjqzdjYGHd3d9q1a0enTp0ICAjA19cXPz8/HBwc/q2wrKCggLVr17a6vUePHlRWVpKdnS19FRYWUlpaSkVFBbW1tahUKpydnZk5c2ar42RkZLBnzx5MTEywsLDA2toaW1tbHB0dcXFxwcbGptUqOi8vL6ZMmfLU+2hoaGDz5s3cu3evxbaRI0cSHBz81OOFPzYRiAmCIAiCIAiCIPwFlJaWsnr16la3P3jwgJiYGL2VXE3fHRwcMDAwAODw4cNcu3ZN71iDBw/G19dXqhhLSUkhMTGRxMREcnNzpRBCLpfj5ub21Cqffv36MWDAANLT06Xw69KlS9y+fRudToednR29e/emW7duKBQKMjIyOHnyJDk5ORgZGWFvb09ZWRk1NTUAWFhYMGjQIAICAti3bx8pKSnNwqZevXoRHh7+X3rG/6mGhgY2bNhAfn6+3u3GxsbU1dWhUCiaTYmUyWR4eHgwefJkunXrRkxMDMePH+fmzZstpqg+rqnCquk9fVx+fj6xsbHU1dVRVlZGSUkJmkcrYuq7rjZt2tC+fXuCgoLw8/PDz88PX19fnJyc9IZle/bsaTZ1t0nbtm15+eWXf1fAplKpWLFiRbMFCB6Xnp7OtWvXKC8vp6qqirq6OtRqNVqtFoVCwXvvvdfq2BqNhoKCAuzt7Vv9yszMbLXCzMTEhIULF+ptyi/8OYhATBAEQRAEQRAE4S+gvr6eL774okU/pyZ+fn5MmDDhd42l1Wr5/PPPW602KykpaRa+KRSKZmGKo6MjHTp0oHPnzgQFBVFRUUFFRYXesW7evEl0dLTUJyowMJA+ffrQq1cv7O3tSUpK4ujRo1y4cAGNRoOHhwcmJibcv3+fqqoqoDEEGzp0KBMnTiQ6OpoNGzZQWVmJTCZDJpM1eyZKpZJp06bh6ur6u57Ff6czZ85w7tw5AKysrCgvL9e7X9MiCNXV1ZiYmKBWq5s9XxsbG0aOHMl7771HaWkpFy5c4NSpU1y8eLHFc/b392f06NHNKvqa+n8VFxeTkZEhhWru7u60adMGS0tLtFotJSUl3L9/n+LiYhQKBf7+/lhbW1NSUkJqaiparRZjY2M8PDwIDAyUqsp8fX3x8vIiMTGR+Ph41Go1BgYGdOzYkfDw8FarC/Vpujd9z2ju3Ll6F4zQaDTcv3+frVu3thoY1tbWcvr0aUpKSigpKZGq6h734osvEhQU1Oq16ZuGLPx5iEBMEARBEARBEAThL+LAgQPcuHFD77Zx48bh7+//u8apr6/n888/b3V7dXU1y5cvx8XFhY4dO9KxY0eCgoIIDAykffv2WFpaNts/NTWVvXv3NuuJVVlZyaFDh3B3d6d379707t2bwMBArl69ytGjRzl69Cj379/HxMSE7t27U19fT3JyshT4mJubEx4ezhtvvEF5eTmrVq3iyJEjKJVK1Go1hoaGzXpsyWQybGxsKCsrk6qt9FVO/U/Jz89n/fr1zcI5IyMjHBwcuH//vt5j5HI5Li4uPHjwgNraWuzs7KipqaG2tlbax9jYmJCQED788EP69+9PWloaFy5cIDo6mjNnzpCdnY2RkREBAQGYmJiQn59Pdna2dLyBgQEuLi60bdsWW1tbqqurSUlJkaZnGhoa0rt3b/r06dOsGqqqqordu3eTk5PTbKzHAygjIyN8fX3p2LGj1EC/KTBzdXX9XVViOp2OmJgYLl68KFUBenp68uyzz2Jvb//UY48cOcLVq1f1bnvmmWfo16+f9HN9fT2lpaVSQFZSUkJGRkar1WkgArE/OxGICYIgCIIgCIIg/EXU1dWxffv2Fj2PwsLCeOaZZ/QGEFVVVdy5c4ekpCQSExOl7yNHjsTW1lbveezt7ZkyZQoWFhYttjU0NHDr1q1m0x8zMzORyWSEhIQQHByMj48P/fr1o0uXLiQlJREVFUVUVBSXLl1Co9EQEBBAt27dKCgo4Nq1azx8+BBorApqCsGCgoL46aefWLVqFXfv3sXU1JSamhrMzMyorq5udk0eHh5kZ2fj5OREUVERnTp1wtTU9H9t6mRdXR2bNm2iuLhYb08rR0dHbGxsSElJaXUMa2trXF1dSUtLQ6VS4eXlRXl5OWVlZc2mqPr5+fHaa68xY8YMjI2NKSoq4tKlS1y4cIGzZ89y/fp1tFotcrkcQG9FoZGREV5eXrRr167FVNrH1dTU8ODBA+zs7FAoFNTU1JCenk5iYiLZ2dlSVduTCxw0ncPHxwd/f38pKGv67urqKl1fk4aGBsrKyjAyMtL7udOnrq6On376qcUUVS8vLyZMmIBCoXjq8devX+fgwYN6txkbG7No0SIxZfJPTARigiAIgiAIgiAIf2Dl5eVcu3aN4uJiqUm8s7Nzq/vrdDrS0tLIycmRGrM7ODhQVVVFUlJSi+Dr8WohT09PAgMDCQoKok2bNlLD9SdNnjwZb29vAB4+fMiVK1e4dOkSFy9e5MqVK1Lz+65du0rVX71798bNzY2HDx9y4sQJoqKiOHr0KPn5+ZiZmTFw4ECCgoJITk7m7NmzzUKwoUOHMmXKFBoaGnjw4AGpqan88MMPlJaWYmZmRlVVFba2ti2ut23btuh0OgoLC3F2diY3N5dhw4Zx+PBhoLFKqH///v/J2/ObVCoVW7dulSqpTExMqK2tbVZN1dTjLCgoCFNTU+Li4pqN8XgPNJlMRlBQEEqlkoSEBORyOZ06daKsrIx79+61mLr60ksv8fbbb0tTRGtqaoiLi5OqyC5evEhVVZXelSsBunXrRmRkZKv3d+7cOc6dOycFa97e3lLFoIuLC4aGhlRWVpKenk56ejqVlZWUlpaSlZWFSqVCqVQil8tRqVTSPRobG+Pj49MiKPP19cXd3b1FWPY0Go2GW7dukZGRIQWGgYGBv2sMjUbDDz/8oHcxg2effZbu3bv/7usQ/nhEICYIgiAIgiAIgvAHlZ6ezs6dO1s0Ox82bBg9e/bUe0xlZSV37txpFnolJiY2m9rm6elJUFCQNM0xKCiIgIAAzM3NpX10Op0UmjRV9hgbGxMcHEx+fr5U/ZWYmIhOp8Pe3r5Z+NW9e3dp1cMbN25IVWAxMTE0NDQQGBhIREQE3t7eXLx4kV9//VXqqWVqasrQoUNZtGgRffr04cCBA9y8ebPZfVZVVfHjjz+iVCpbVADZ2NjQvXt3Tpw4QUBAALm5udjY2NC1a1f2798vBUwymYxZs2b9j/UTU6lUbNmyhdLSUmpqaqTzPt4/TC6XS2FS0zTPIUOGoNVq9fbOejwcs7Ozo0ePHhQWFnL9+nVMTU3p27cvDx8+JDExsVmlnJmZGQMGDODdd98lNDRUqhZsaGggMTGRCxcucPHiRaKjo6UKQ1NTU8LCwujVq1er91haWoq3tze2traoVCqSk5O5efMmt27doqCgAGj83Lz44ot4e3tL5zUwMMDBwYGCggLu3r3LnTt3SElJkaZFymQyTExMgMZ+Xzqdjnbt2hEcHIydnZ10fY8HZ+7u7q1Og83JyeHChQvk5uZibGxMhw4d6NOnT4tVVJ9UW1vLqVOnSEpKora2FhcXF/r27UtgYOBTjxP++EQgJgiCIAiCIAiC8Aek0WhYuXJli+l/0BgWTJ06lYKCgmbBV1JSUrPgy8vLSwq+3N3dsbOzo3PnzrRv3/539W+qra3l8uXLXL58maSkJE6ePCmFHEFBQc0CMD8/P2nMsrIyjh8/TlRUFMeOHaOgoABzc3MGDx5MeHg4zs7O/Pzzzxw+fFgKhkxMTKQQrG/fvsjlch4+fMjatWub9R57XFZWFps3b5YCIkNDQ8aMGcPZs2cpLy+nS5cuXLx4kdGjR1NaWsr58+elyiyZTMaQIUOoqamhoKCAkSNHtuh99p9IT0/n4MGDhISEcPz4cal/WVMAZmpqSkNDA/X19Zibm0sLBDQ9C7VazaRJk3j48CEHDx5sMbWxqVda0zPv1asXXl5e3Lhxg8TERGxtbYmIiEClUhEdHU1hYaF0rEKhoEOHDixcuJCXXnoJIyOjZmPn5ORw8eJFLl68SG5uLl26dGn1Pn/55RcSExOllR07depEjx496NGjBz4+PqjVauLj4/W+hzqdjoSEBDw8POjYsSMdOnTA3t6ee/fukZKSQkpKCnfv3iU5OZmAgAB69OjR7HiVSsW2bdukKkcjIyO8vb1bVJUZGRlx9uzZFtNV3d3dmTp1aqtTJ2/cuMH58+cpKytDJpPh5+dHREQENjY2rT4P4c9DBGKCIAiCIAiCIAh/QMnJyezatavV7U0rFspkMin4aqr2alrxz8zMjKtXrxIVFdUsUGnbti3jxo2TKnCaPF75dfHiRWmFQDMzM0JCQqTwKzQ0tFkooNVqiY+P5+jRo0RFRXH58mW0Wi0dOnRg2LBhhIeHo1QqWbt2LYcOHZJW9GsKwRYsWED//v2laWy3b99m9erVbNmyheeee4527dq1+hy++uorKioqeO6555DL5fzyyy/07duXgoICcnNz+cc//sGGDRtITU2VKrAMDAwIDQ1l8ODBfPTRR0Bjj66BAwfSqVOnf//Nekx9fT3Hjx/n2rVrQGMlVNM0zTZt2nD//n1p5UsDAwPc3d3Jzs5GoVCgVCqlhvlKpRKZTIaZmRnz588nKyuL7du3S+9jU6WYUqmkoaFBet3W1pbnnnsOpVLJ0aNHyc7OxtXVleeffx5TU1MOHDggrRDZxN3dncmTJ7NgwQKcnJya3U9tbS0rV65sMZUSGqfL7t69W2pID42hpKGhoRTwmZiYsHDhwlYrscrLyzly5AipqalS1Z6Pjw8dO3akU6dOdOzYEQcHB86cOdPq8StXrpTCLjMzM4yMjGhoaKCyshKdTsff/vY3rKys9B4fEhLC4MGDW4Ri165dk6bWPs7CwoJXXnkFU1NTveMJfx4iEBMEQRAEQRAEQfgDio+P59ChQ61ut7S0pH///gQEBLT6y/n9+/fZuHGj3m3t27enXbt2Uvh16dIlsrKygMYm9E0rC/bu3ZuOHTu2CAxKS0ubVYEVFRVhYWHBkCFDiIiIYOjQody7d4/vvvuuWQhmbGzM0KFDef311xkwYIA0xU2j0XDo0CG++eYbzp49i52dHXZ2doSFhdGmTZtWn0N0dDQjRozg008/RafTERkZyc6dO/H39+f9999nzpw5PHjwQGq6r1Ao6NixI1988QVDhw5Fp9Ph7e1NQUEBdXV1dOvWDWtra0JCQn6z6frjDA0NOXHiBDExMc2qoWQyGU5OTnTq1Injx48TGBhISkoKOp1OCqVCQkK4cuWK9Owf7+tmZ2fHgwcP8PHx4Y033uDatWts2rQJrVaLTqeTKt7kcjkmJiZUV1dLVWh9+/ZlyJAh5Ofns3fvXoqKivD19WXcuHG0adOG7du3ExMT0yzssrS0JCIigiVLltC1a1dkMhmZmZns3r272X0ZGRlhYGAgrQBZVFQEgJOTE8bGxpSWllJVVYWFhQWLFy9u9bnZ2Njw+uuvU1NTQ1JSErdu3ZKmXN68eZPi4mJGjhxJ165dWx3jxRdfpLKysllVWdN3W1tb5syZ0+qxd+/eZc+ePXh5eUmVZT4+PlRUVLSYqtykf//+DBgwoNUxhT8HEYgJgiAIgiAIgiD8AeXn57Nu3bpWt48cOZLg4OCnjrF//34SEhL0btNqtXz55ZeoVKpmze979eqFm5tbi/3VajUXLlzg/PnzHD16lNjYWLRaLZ06dWLYsGEMGzaMkJAQYmNj+f777zl06JBUJWRsbMyQIUN49dVXGThwYLOgqaSkhA0bNvDdd9+Rk5NDQEAADQ0NpKamEhgYyIwZM6Qw7UlyuZy4uDgOHTrE6NGjqa+v59dff2X+/Pn069ePyZMno1KppJ5dhoaGeHt7c/jwYbp27UpFRQX29vaUlJRgYmLCxIkT2bBhA8bGxjg7O6NUKnF1dSUyMpL79+9LIZCJiYlUERUTE8O9e/fo0KEDo0ePZu7cuTg7O0tTS6FximJYWBh1dXXExMTQp08f4uLiUKvVKBQK1Go13bt3Jy0tjfLycqysrDA0NJRCJmis6svJyaFfv34sWbKEo0ePcvDgQZRKJXl5eWg0GqnXm42NDeXl5eh0OnQ6HZaWlkydOpUOHToQExPD3r17qaiooEuXLowfP57g4GB++uknDh06JE1hhcYqte7du7Nw4ULCw8NJTU2lvLwcOzs7AgMDpRUWmxZyuHDhgtSLrGnFTEdHR+bMmdNqby9TU1MmTJig9zMHUFhYyJ49e6Tnrc+ECRPw8/Nr8XpT5eKRI0daPbbpWaelpZGamkpaWhpVVVXMnDmz1WM8PDx4+eWXW90u/DmIQEwQBEEQBEEQBOEPasuWLWRkZLR43crKinnz5v1mQ/AffvihWU+xJ3Xv3p2BAwe2mDrZpLi4mGPHjhETE4O5ublUiVZVVYWvry8jRozA2dmZ6Oho1q5dy+HDh6UQzMjIiMGDB/Pqq68yaNAgKTxpcv36dVatWsWOHTvQarX06NGD+/fvk52dLYU+bm5ufPLJJwQEBOi915iYGG7dusWrr77KmjVrqK6uZuPGjaSmpvLWW28BjRVWpaWlmJiY4OTkRHR0NM8995w0pbFt27ZoNBrKy8sZMWIEu3btYvz48ezYsQMzMzMsLCz48ssvmThxonTeHTt2sHnzZuRyOceOHaOhoQGFQsGDBw9wc3Ojtra2RXWRTCZjyZIlHDx4kDt37jBq1CiOHz9ObW0tFhYWVFVV0bZtW9zc3Lh06RIAYWFhXLhwQZoOaGFhgbW1NXK5nIkTJ0rPRKPREBsby4kTJ6SADRqrvQwMDCgrK5MqyXr27Mn06dOxsLBg3759HD58GI1Gw6hRo+jVqxfBwcGcPXuWH3/8UWqu33T9Xl5ezJgxg9mzZ2Nvb9/q56rps9PUh6y8vLzVwOvAgQNcv34dd3d3QkJCCA0NJTQ0lK5du0qft4sXL3Ly5Em9x8vlchYuXNhsQYjHaTQavvrqK6lZ/5OGDBlC7969m71WWFjI999/3+q9+fj4MGnSpFa3C38Ov3+tUkEQBEEQBEEQBOF/1ZgxY/D392/2mqurK5MnT/7NMAx4avNvmUxG//79m4VhDQ0NXL58mQ8//JCePXvi5OTEmjVrcHR0bDYt09zcnNLSUpYsWYKVlRXPPPMMO3fuRKVSMXz4cA4fPkxFRQWHDx8mIiJCCsPUajW7du2ib9++dO3alePHj9O/f3+sra25dOkS3bp1IyYmhh9++IEdO3YQHBzMoUOH2Lp1a7MKoYaGBi5cuECbNm2YMmUK7733Ht7e3lK1WFMY5ujoSGlpKebm5lhZWXHixAm++eYbKQx75plnyMvLo6ioiCVLlrBv3z5kMhkdOnRAJpNhaWlJ7969ycvLa/bsNBoNfn5+ZGZm0rVrVwwMDNBoNJw7d44333wTjUZDUFCQ1BMNGquoli1bxj/+8Q/c3d05cOAAkydPxsrKisrKSpydncnOzubmzZu88soryOVyLly4gI+Pj7SiYWVlJdXV1S3ef4VCQe/evaXVKaFxCmd1dTVlZWUYGRlJ006vXr3KK6+8wuzZs7G1tWXHjh188MEHdOzYkaqqKqKjo6mrq+Ojjz4iPz+fH374gS5duiCXy8nIyODdd9/FwcEBOzs7pk+fzs2bN1s0qwdwcHDgueeeY/ny5axcuRJHR8cW+9y4cYObN2/i5+eHi4sLycnJfPDBB4SFhWFpaUm3bt2YP38+qamprYa23bp1azUMa3o2AwcO1LvN1taWbt26tXjd0dHxqYFfQEBAq9uEPw9RISYIgiAIgiAIgvAHV1ZWRklJCRYWFjg5Of2uFSLh6T3EgoKCGDNmDIWFhRw7doyjR49y/PhxSktLsba2ZujQoURERFBYWEh9fb3eMc6fP090dDSDBw9m7ty5DB06tMWKhdBYcbN27Vq+//578vPzCQ0NxdHRkdOnT6NSqZg6dSqLFy/GysqKTz75hLVr16JQKKTzDh48mI0bN7J9+3Z++OEHDAwM+PDDD/n222+5ePEiH374IfPmzeP555/nwoULyGQyHB0dKSgowNraGplMxvnz5ykoKGDIkCHI5XICAwNJS0ujT58+ODo60rlzZ3755RcUCgW1tbXcuHEDIyMjli5dSlZWFqtXr5bu54cffqCyspI33niDuXPn8v3331NfX8/EiRNZu3YtdnZ2UtN7lUqFr68v3t7eQOMKjj/++CPDhw+ntLSU999/n3Xr1lFYWIivry9paWnIZDLeeOMNtm3bRkFBAQYGBkyaNIlt27YxaNAgQkJC9L4fTU35v/76a2pqaqTG/QqFgrq6OuRyOZ6enlRUVFBSUoKVlRVz587F2Ni4xVi5ubls2bKFyMhIxo8fz9ChQ7l8+TIrVqzgzJkzUvN/aKwG7NOnDwsWLGDYsGEtqgGhMcRMTEwkKysLhUKBv78/1dXVXLx4UZpm2dQ7ralSTi6Xk5+fT0ZGBvb29jz//PO4uroCjYFu586dGTFiBAYGBuh0OvLy8qTn165dO5ydnaXz3759m/Pnz1NcXIxCoSAoKIjBgwe3GqZlZmaybds2Ghoamr3etm1bJk+e/G/1lxP+mEQgJgiCIAiCIAiC8Bemb5VJa2trSkpKiIqKkqqlunXrxrBhw4iIiJAayt+7d49Nmza1OraZmRmvvvqq3kAF4MqVK6xatYrdu3djYmLCCy+8gFar5dChQzQ0NDBv3jxee+01TExMpEqipl5YOp0OPz8/tm/fjlKpZPr06dy4cYOFCxfSo0cP5s6di7m5Odu2bcPFxYWIiAiysrIwMjLC0tKSgoICHBwcqK2t5dSpU3h4eODl5UVtbS1GRkb07NkTT09PfHx8ml2zqakpn3zyCTU1NTQ0NHDp0iU+++yzZisOrlu3Dnd3d4YPH853333H3LlzpWPLy8tZtGgRq1atYtCgQbi5uUlhWJN79+6xcOFCQkJCqK6uZtWqVaxYsYLMzEyCgoJITEwEYMqUKVRVVbF3716gsaItODgYS0vLVt+ToKAgBg0axOrVq1mxYgWVlZVSBZuFhYXUI8zd3Z3Q0FA6dOjQ6liGhobs2rWLhIQErKyseP755xk/fjwDBgwgNTWVVatWtejvJZfLCQgIYObMmUydOhVbW1u9Y1dUVJCSkoJGo8HDwwNXV1fu3bsnTbO8cOECCQkJ6HQ67Ozs8Pf3x9zcHI1GQ0FBAenp6dTX1xMQEEBISAjt27dv1vQfoGvXrowYMaJZgKxSqVAoFM2q91pTUFDApUuXuHfvHkZGRnTo0IGQkBC9gZ/w5yMCMUEQBEEQBEEQhL+49PR0fv31V5KTk4mOjubWrVvY2toydOhQhg0bRnh4OE5OTkBjYHDy5En27NnDuXPnmDp1aqvj+vn5MWHChGav1dfXs3v3blatWkVcXBxeXl6MHj0aU1PTZlU1nTt3ZsiQIaxZs4Z//vOfVFVVodPp0Gg02NjYsHr1akaPHs3SpUtZtmwZ7du359tvv2Xnzp2sWbOG0aNHs2HDBhITE4mMjKS6uhobGxvkcjmFhYW4uLjw4MEDoqKiCAsLo1OnTty5cweAOXPmcPXqVSIjI/XeV2JiItevXycnJ4eKigp69uzJzZs3gcbKpG+//ZYhQ4bg5+fH7t27GTt2rHTsqVOn6NixIy4uLgwbNozu3bvrPUdpaSkzZsygZ8+eqFQqdu7cyWeffcbNmzfp0KEDycnJUs+vcePG8dZbb9HQ0MDEiRNbBGyP++qrr+jRowdffvklnp6erFmzhn/961+UlZVhbGxMTU0NDg4OlJSU8Oyzz7Z6fdAYiM2YMYPS0lJ27NjB9u3bSU9Px8nJibFjxzJ+/HhCQ0MpLCxk8+bNrF+/noyMjGZTKJ2cnBgzZgzz58+nffv2AFy6dImTJ0822699+/Y8//zzzT4j5eXlXL58WQrILl++LAWanTp1wtnZGa1Wi5mZmTSt9ElNU0l/r6qqKtLS0mhoaMDLy6vVQE/48xOBmCAIgiAIgiAIwh9cQ0MDxcXFyGQyHBwcfrO6RaPREBMTw9GjR7l8+TJXrlyhpqaG7t27SytC9ujRQ1r5r76+nuPHj/Pzzz9z4MABysvL8ff358UXX8TW1paKigq95xk1ahRdunQBGqfYff/996xbt46ioiKGDBlCjx49uHr1KqGhoXqv+dq1axw5ckQKapRKJW+99Rbvv/8+cXFxzJw5k/T0dN577z1GjRrF5MmTuXv3Ll999RWvvPIKW7ZsYcaMGQDSVMCioiI8PT25f/8+e/fuJTIykjlz5rBu3ToUCgXh4eHEx8czduzYVnusabVaoqKiUCgUXLp0SQqQoLECauXKlcydOxcTExO++eYbVq5cSX5+PpWVlcybN49vv/2WiRMn0qZNm1Z7XxUUFNC1a1fatWsn9biKiorik08+4cKFC/j7+1NUVER5eTnOzs6sWrWKd999F4VCwZgxY/SOaW5ujq+vL0uWLCE1NZXp06fzySefYGlpyffff8/y5cspLi7G3NyciooKhg8fTs+ePVv5FMH27dvJyMhg1KhRzJo1i0GDBnH9+nV27NjBzp07yc/Px8vLi3HjxjFhwgQ6dOhAdXU1Bw8eZPXq1cTGxjZbXMDU1JTnnnuOdu3a6T1fSEgIERERrV6PWq3m+vXrUkB24cIFioqKWLhwIVZWVnqPKSoq4tChQ1Kz/pCQELp27ar3fblw4QJnzpxpVk3ZrVs3hg8f/rsqyoQ/F/GOCoIgCIIgCIIg/IElJCSwcuVKqQfXqlWrSElJabFfXl4emzZt4sUXX8TDw4N169ZhaGhIv379ePPNN9m+fTsXLlzgo48+IjQ0FLVazYEDB5g0aRKOjo6MHDmSq1evsmDBAm7dusWdO3f45JNPePHFF/VOEfP19aVjx45ER0czduxYPDw8WLlyJc8//zyffPIJ2dnZfPbZZ/j4+LQaJjQ1r6+pqeGFF14gPz+ft956izfeeIOwsDCsrKyIj4/HxcWFXr160dDQQFxcHHPmzOH9999n6tSpaLVaunfvzsOHDykqKsLf35/s7Gx++uknIiMj+fnnn6VnYWtri4uLCxUVFVIYqI9cLufhw4f07t2bhoaGZlMCZTIZarUahUKBl5cXqamp9O3bV2pyv2vXLrRaLR988EGrYRg0rhT66quvYmZmxi+//IJWq2XEiBH861//YuTIkaSkpGBlZYWPjw8FBQVMmDCBDz74gN69e3Pjxo0W41VWVvLPf/6TL774gvPnz/PNN9+wf/9+/Pz8+PLLL3nllVfIzMxk5cqVWFhYIJPJuH37dovVMJuYmZlx8eJFvvzyS1JSUoiIiMDHx4eoqCgWLlzIvXv3OH36NIMGDeL777+nY8eOdOzYka+//prQ0FAuXrxIXV0dZ86c4cUXX8TMzIyampqn9r+7fv16q9cDjT3SevbsycKFC/nll18oKCiQnlNrnJ2dGTduHAUFBbz33nv07dsXS0tLevTowWuvvcbWrVtJS0sjKSmJU6dONQvDoDG0vXjxYqvjC39eIhATBEEQBEEQBEH4g7pz5w779++nqqpKeu3hw4fs2rWL9PR0zp07x9tvv03nzp1xc3Nj5syZ5ObmMnv2bHx9fZuNlZKSwu7du9m7dy8TJkyQVgFMSEhg8eLFJCYmkpiYyEcffSQFVdDYa+qVV16he/fuODs74+HhwdChQ6mtraV79+7069ePhIQEli5dyuLFi9m/fz8ffPABHTp0ICYm5qk9qoyMjAgNDSUxMZGff/6Zq1evEhQUxMaNG1mxYgVHjhzh448/Zvbs2UyePJm4uDh8fX0ZN24cn332GQDPPvssKSkplJSU0LlzZ1JSUvjuu+8YP348mZmZTJw4EZlMhkql4r333mPTpk08++yzUgN3fWpqakhPT6dXr14UFhai0+mk5yGTyaTQxs/Pj7S0NMLCwnjw4AHQOBXyypUrtGvXrtXFCABKSkqwtLRkzJgxhIWFsXbtWtRqNQMGDOCzzz5j2rRpZGVlUVlZyYABA6ivr2fChAm4uLjw0ksvsXfvXm7fvs3du3eRy+VYWlpSVlbGpUuXcHNzQ6VSkZqayty5c1m6dCnt2rVj165dzJ8/n/T0dNasWQPAzz//jFqtbnZt5eXlLF++nJCQEOzt7bl58yaXL19myJAhfPHFF3h6ejJy5EhKS0tZsWIFBQUFHDx4kA4dOrB06VK8vb3p1asXa9asoX379uzevZvKykqSkpKeOt1TpVJx+/btVrc/qal5vrW1dav7ODg4SCFheXk58fHxfPPNNwQGBnLixAkmT56Mn58fK1eubHWM2NhYvStpCn9uIhATBEEQBEEQBEH4g4qOjtb7uk6n45NPPmHAgAFs2rSJzp07s23bNoqKili3bl2r46WnpzNv3jySkpJ4++23uXPnDrdu3eKDDz5otQcTgK2tLc8++ywRERHcuXOHESNGMHv2bNzd3dmyZQsjRozg008/5Z///CeRkZHcuXOHX375hdDQ0FZX8Wty8uRJnJ2defnll4mIiKBdu3bcvn2bnj170q1bN06cOMGePXtYu3YtFRUVDBgwgH379qHT6Zg1axbnz5+nrKyMkJAQEhISWLZsGXPmzKGuro6BAweiUqmQy+XMnz+fzZs3ExQURHR0NObm5q1WrhUVFaHVaunVqxe5ubkAUm8ruVwuBWK+vr5ShZhOp8PY2BiZTMbevXuRyWR07dq11fu+cuUKJSUllJaWMmnSJGbMmMEnn3xCXV0dvXv35v333+eNN96goKCA69evM336dAA++ugjdu3axbZt28jKymLHjh18+OGHnDp1iujoaDp37kxDQwOLFy+ma9euzJgxg+TkZPr27cu0adPo1q0bFy9e5JVXXiE1NZU33niDffv2ERUVRUJCAvv27WP79u3Y2tqSnZ3NxIkTsbGxYf/+/axZs4b8/Hy++eYbvL29SUxMZMWKFSxduhQbGxt27NhBYWEh27Ztw97enkWLFuHq6sqQIUP48ccfcXFxaXW6JDROiezRoweenp688cYbnDt37qkVY0169OjR6rbHp4QqlUqCg4OZO3cumzdvJjk5mdLSUqKioqTVK/Wpqqp6argp/DmJQEwQBEEQBEEQBOEPqKGhgfz8/Fa3e3l5ERcXR0FBAT/99BMTJkzA3t6e+/fvP3XcH374gRs3bvDuu+8SEBDwm9eh0+k4ffo0o0ePxtvbm7Vr1/Lyyy9z5MgR7OzsmDZtGhs3buS1114jOzubdevW4e/vLx3f1GNMn8DAQA4fPkz79u3Zv38/GzduJCoqiu3bt9OvXz/c3Ny4ceMGY8aM4datW/Ts2ZPExES6dOnCBx98gFqtpkOHDgwaNIgrV67w97//nSVLlgDw8ssvk5WVhYWFBf7+/nh7exMfH0/v3r0pKSnhgw8+wNXVlcrKSul61Go1586do7S0FE9PT1xdXaXn2TTF8vFAzM/Pj4yMDDw9PXFycsLZ2RmdTseePXvQ6XS88MILZGRkNJuGp9VqOXnyJLdv38bJyQmtVsuxY8f49NNPee+995g/fz4VFRWEhoayaNEiPv/8c8rKyti1axcffPABcrmcQ4cOMW7cOLZt28abb76JTqcjNjaWYcOGsWTJEjZu3IihoSFZWVm0b9+eZcuWsXXrVi5duoSxsTGDBg0iMjKSjIwMZsyYQUJCAvPmzSMxMZGEhAQMDQ1JS0vDxMSEHj160NDQwOeff46ZmRmzZ8+mvLwce3t76Z7kcjknTpxg9OjRHDx4kOeff55Dhw5RUFDAd999h0ajYcaMGTg5OXHy5MlWPw/V1dWYmJiQnZ3NV199xYABA7CxsWHcuHHs3LmThw8f6j0uNDSUbt26tXi9V69eBAcHt3o+aAx7IyIicHNza3UfIyMjaUqs8NchAjFBEARBEARBEIQ/ILlcrrd3VxMrKyu6d+/eosrJ2Nj4qeO2bdv2d52/qqqK7777TgqcUlNT+fbbb9m9ezcpKSkMHz6cU6dOsWzZMu7du8dnn32Gs7NzszGio6MZPXo0V69ebTG+jY0Ne/bs4cUXX6R3794kJSURERHB0KFDef/99/n73//OuXPn8PDwICoqit69e1NZWcmECRMYMWIEcrkcT09PhgwZQmBgIK+++ipLly4FYOPGjezatQszMzNqa2v56quv+Mc//sGUKVPYvn078+fPx9vbm+XLl7Ny5UpeeOEF2rdvz4oVK4iOjqagoIBevXoBcP/+feRyufSc5XK5NMXQz88PlUrFvXv3CAsLw8jICIDs7Gxu3ryJTCZj8uTJfP3111y6dInS0lI2bNjA/fv3sbS0pLCwEDMzM9zc3Pjoo484evQoq1at4sUXX6SoqIhevXoxa9Ys1q9fT01NDZ9//jlfffUVxsbGpKamEhoayrBhw4iKisLMzAyVSsXEiRM5ffo0SUlJhIeHI5PJWLduHQ4ODlRUVHDp0iV27dpFYmIiHTp0YP78+ZSVlTFlyhSSkpLYsWMHFhYWaDQa7OzsuH37Nmq1mv79++Po6IharW4xxbLpuXh5eTFx4kRcXV1ZsGABeXl5zJ49mzNnznDv3j3++c9/kpiYyNmzZ1tMQfT29mbZsmWUlZVx/vx5/va3v+Hv709QUBAFBQW888472NvbExYWxldffUVaWlqzc48YMYJ58+YRERFBeHg406ZNo2/fvk/tWfa4p1XzBQcHi6b6f0HiHRUEQRAEQRAEQfgDkslkBAUFtbq9td5cv9Wzy8/P76nnTUtLY+HChbi7u/Pqq68SEBDAyZMn+fjjj9m8eTPh4eFkZWWxefNm0tPTWbRoERYWFs3GSE9PZ8CAAfTr14+MjAwqKioYOnQoPXv2JDg4GFtbWz766CPOnTvHnj172Lt3L/Hx8XTq1ImUlBROnTrFJ598gkKhYNWqVYwYMQJorPjRV8ljZ2fHwIEDpUbxc+bMQS6XU11dzT//+U82btyIiYkJSqUSuVzOe++9R3JyMikpKYSFhdGhQwfOnTsnnePGjRv07t0baFw98/HqoCcrxABp2mRWVhbQWE22d+9eAAYNGoSjoyPHjx8nJCSEiooKunbtKjX2r6ioIDc3F29vbyZMmEB2djbbt2/nmWeeITs7m379+jFu3Dh+/vlnNBoNixYtYsWKFTg7O1NdXc3gwYPJzMzk1q1b0jX//PPPjB49mpEjR/KPf/yD999/n5EjRzJ9+nSGDh3KkCFDuHPnDp9//jnbtm3D19eXZcuWoVarGTduHLdu3WLPnj1YW1tTW1uLi4sLCQkJ5OfnN6v+e5KVlRWJiYnMmjWLnTt30qlTJ0JDQ9mwYQNWVlYsWrSIuLg41q1bh0wm48aNG5w5c4Z9+/YRExPDlStXkMlkhIWFMXbsWCZOnMiwYcN45plnmDp1KpMnT+bq1assXrwYPz8/aUXN6OhoNBoNDg4OmJubExcXxw8//MDy5cv54Ycfnlpp2aRr1656QzEfHx9pFVDhr0UEYoIgCIIgCIIgCH9QgwYNwtbWtsXrLi4uhIWF6T3G3t6ewYMHt3hdLpczcuRIvVO/tFotR48e5dlnn6Vdu3Zs2bKFuXPncufOHSIiIpg3bx4vvPAChoaGHD58mOvXr9OjRw/i4uJITEyUAqIHDx4wY8YM/Pz8OHfuHJ6enpw5c4Zz587Rq1cv/P39WbVqFa+//rrUaywyMpKFCxcSGRlJr169SEhI4JlnnkGj0fDaa6/x+uuvY2Jigr29PX379m31WSUlJVFeXs6QIUNoaGjA0tKSZ555hsDAQHbv3s2SJUv48ccf+fvf/46dnR2LFi0C4Msvv0Sr1bJ//37UajVBQUGo1WopXGqqEGuqaHo8EGvbti0KhUJqrF9fX4+bmxtarVYKxGQyGR9//DEAX3zxBUuWLOHQoUM8++yzmJqaUl9fj5eXF+np6RgZGTFmzBg0Gg1HjhyhS5cuJCYmMmTIEIYPH87JkyeRyWS88sor/P3vf5cCnHnz5vHxxx9z+PBhPv30U8zMzBg+fDjFxcXodDrkcjk+Pj5Mnz6dO3fu4OTkxMaNG1m8eDFpaWlMnTqV9957j4CAAHbs2IFMJmPMmDFcv36dAwcOYG9vz8OHD/Hy8vrNXlpNlV7379/nl19+wdbWltmzZ+Pi4sKsWbOIjY3F19eXDz/8kL179/LVV18xdOhQDh06RL9+/fD09OTtt9/m6NGjLVZ89PT05KOPPiI8PBylUkl6ejorVqygX79+2NnZ8corr/Dzzz9LCxwA5OTksHnz5mYrheojk8mIjIxk5syZhIWF0adPHyZNmsTEiROfWqkp/HnJdGKpBEEQBEEQBEEQhD+suro6rl27Rnp6OnK5HD8/P7p27fqbv6RnZ2cTHx9PRUUF9vb29OjRA0dHR6AxAEtISCA+Pp68vDzS09M5fvw4zs7OvPbaa0RERLB582a+/vprCgsLGT16NG+++SahoaEUFhayY8cOysvLpXOZm5tTX18vNYW3sLDgX//6FzNnzkQul9PQ0MCqVat49913sbe3Z926dYSHh5OSksK4ceNISkpi+fLlvPbaa8hkMioqKhg3bhzHjh1DqVTSoUMH3nojxDMhAABOQklEQVTrLeLi4jAzM2v1nmNjY/n1119xc3Ojurqa2NhYIiIi8PT0xMbGhtjYWFJSUmhoaMDa2hp3d3eysrKIi4uTmq8vXLiQtWvXUl5ejkKh4JlnnuHy5cvodDpUKhVWVlZMnTpVWpXQ39+f4cOHs3z5cmxsbOjSpQsXLlwAGlf2bNeuHRqNhjZt2lBQUEBUVBQzZ84kODiY8+fPY2xsjFqtplOnTmRmZlJYWMiUKVNYt24dDx8+pFu3bmRkZDB06FCOHDlCQkICffr0ob6+nhUrVhAXF8eOHTswMDCgb9++7Nmzh3379rVaFZWbm8v69eulaz98+DC+vr6kpKSwZMkSDh48SM+ePVmxYgV9+vQBGvvIHT16lI8//hgbGxtCQkL0jp2cnEx0dDSvvvoqs2fPlsLcnJwcfvjhBzZu3Mi9e/fo2LEjM2fOZNKkSdI+Wq2WixcvSp+t1prvy2Qy3njjDXQ6HcePH2f//v3s37+fiooKXn311Wa9zR4XHBzMyJEjm72m1WpJTk4mJycHpVJJ+/btn9pcX/hrEYGYIAiCIAiCIAjC/4/odDp+/PFHcnJymr0uk8kYNGgQ+/btY+3atajVaqZOncrixYulcKIp2Ho8DGtSVVXFt99+y9y5c/n444+l4CoxMZEZM2YQGxsr9fkyNzdn8+bNvPrqq7i5ubFz506p+Xl2djYjRowgIyODuro6wsPDmT59OuPGjeOll156alDy4YcfYmtry4MHD9i5cye3b9/miy++YPPmzYwfP57NmzczZcoU3nrrLb744gs2bNjAjBkzePfdd1mxYgU2NjaEhoZSVlbGmTNnyMrK4sMPP0Qul1NSUkJcXBxqtZrx48ezevVqAGk65+HDhxk6dChqtZqzZ89iYGDA0qVLeeuttwBYs2YN8+fPp0+fPsyaNYuXX36ZRYsWUVpaipeXF9AY0OTm5rJlyxa+//57pk2bRn5+Pl26dKGoqIixY8eyY8cOUlNT6d69O1VVVVKj/X/84x8YGxvj6OjIvHnzqKura/UzUFtbyxdffIFOp0Mmk/H666+zfPlylEolZ8+eZfHixcTHx/PCCy+wbNkyfHx8pM9OVFQU58+fx8TEpNmY9fX1GBsbk5WVxfbt2zEwMGDq1KksWLBAmmbZ0NDAiRMnWL9+PQcPHsTAwIAxY8Ywc+ZM+vfvL/X7Wr9+PXl5ea1e/5w5c5r1q2toaODs2bNSEKmPVqtl6NChhIaGYmBgQG1tLVu3bm1xntDQUIYOHfq7e48Jf14iEBMEQRAEQRAEQfj/gYaGBo4cOcLWrVtb7U1WWVnJpk2beOWVV3j99ddbNMm/c+cOu3fvbvUcYWFhUr8llUrF559/zqeffoqPjw8bNmygT58+VFRU8Morr7Bjxw6mT5/O119/jbm5OQBXrlxh1KhRqFQqysrKmD59OhEREYwbNw6tVssnn3yCVqtt0ZAdYM+ePaSkpGBkZMTo0aN555136Ny5M2+//TanT5+mqqqKa9euIZPJsLGxQaPRSH28goKCyMvL47nnniMqKooZM2YQERHB6dOnWzzD48eP06VLF77//nugsaLs119/JSUlhU8++YQVK1ag0Wj4/9q7z7CqrrXt+/+16CAg9oJiARUVwYK9d02Cxt6VmNhj3DExidFsUy2RRJNYojHqjhqNxo69xBKwKyI2VIwFO0gRlLbeD8jaImth7vt9nv3shPN3HDkic4w55lwLPp3HNa6RlpZGQEAAhw8fBrJDqFKlSpkb27/55pu0bt3aYsXb3bt3+fHHHwkPDycgIMAcgCUmJjJ8+HDmzZtHbGws/v7+PHjwgJEjR9K0aVOCg4Oxs7NjxIgRefq65TCZTIwaNYp79+7Rs2dPzp07B2T3Yfv1119p0aIFWVlZLF++nIkTJ3Lnzh3GjBnD5MmT8fDwALJPg1y5ciUXL17kyZMn3Lt3j2vXrnHw4EHKli3LG2+8QXp6Oj/88AN37tyhX79+1K9fH6PRiIeHB4GBgRQuXJilS5fyww8/EB0djbe3N6+//jpDhgzh4MGDnDlzxuL7G41G3nnnnTyB3OPHj5k+fbrlP0zg/v37fPfddxQrVozOnTtTu3Zti8EuQI8ePfLt3yd/D+ohJiIiIiIi8hd2//59zp07R2xsrMWgKC4ujpkzZ+Lj40OXLl1wc3OzuparqysHDx60eGJkzlp/xtGjR6lbty6ffPIJEyZM4OTJkzRp0oQjR45Qu3ZtNm/ezIoVK1i0aJE5DPvll19o2bIlAPHx8UyZMoXWrVvTu3dvsrKymD59OpMmTaJ3794ULlzY/CwnJyd27txJVFQUFSpUoFixYnz77beMHDkSLy8vatasye+//86MGTOwsbFh9erVJCYmMnjwYGxsbLh48SJnz541b0+8c+cOAQEBecIwyG6W37p1a3MPMQBvb29iYmLIyMigadOmPHz4EH9/f9LS0jhy5AjXr183v+f48eMxGAxMmTKFd955x+r2zxIlShAQEED37t2Jj4/Hx8eHvXv34uDgwPfff8+kSZMoW7YsFy5cwNPTk3nz5rF+/Xp2796Nk5MTV65csfq7uXbtGgEBAcTGxhIZGUlISAi2trY8ePCAli1bEhQURFJSEgMHDuTChQv885//ZMGCBVSuXJlZs2aRlpaGi4sLQ4cOZfr06fTq1Yu4uDgOHjxIxYoVqVSpEp9++ilz587ltddeY/r06VSpUoWHDx8SFxfH5cuXWblypXmL5oULF/jtt99o2LAhU6ZMwdPTk/Xr11t9/5o1a+YJwyD7dNVKlSpZva9bt278/vvvvP7660REROT7t3zy5EmrY/L3oUBMRERERETkLyglJYXly5czZ84cfvnlFxYuXMiCBQu4f/8+AKdPn2bYsGF4enry4Ycf0rRpU44cOUKnTp3yXddab7KEhARCQ0PzvdfFxYV33nmHhg0bYm9vz7Fjx/jss8+wt7dnxowZNGnSBB8fH/71r39Rrlw5c9P3zz//nN69e1O0aFHu3bvHDz/8QPny5RkwYAAmk4mQkBAmTJgAZPe9evPNNxk+fDhDhw5l48aN/P7779SsWZPLly/z008/sXHjRvbt28c333zD5MmTad++Pe3btwfgo48+wmAw8OmnnwKwYcMG8+mTOQcOODs7W/2MDg4OuQ4m8PHxIT09nWvXrtGgQQNsbW3x9fUFsquZng13xowZg52dHTt27CA9PT3f79Ld3Z34+HgGDRpEVlYWderUYevWrdjY2PDFF18QEhJC0aJFOXfuHFWqVGH16tVMmTKFsLAw/vjjD5KTk/OsaW9vzz/+8Q9q1qxJ+/bteeeddxg1ahSXLl0yH1iwadMmc9N9JycnPvzwQy5dukSPHj0YP348NWrUYN26debwtXHjxmzdupUjR45Qs2ZNDhw4gKenJ3Xr1mXx4sU8evTI4ufbs2cP0dHRGAwGWrRowU8//URsbCxff/01p0+fJjQ0lMzMzFz3VKhQId+/3w4dOuDo6JjneqlSpWjcuDGNGzdm6tSp7NmzBxsbG6vrJCYmWh2Tvw8FYiIiIiIiIn8xJpOJ1atXc+nSpVzXb9++zYIFC2jVqhX+/v6EhoYyceJErl+/zr/+9S8CAwMpX7681XVtbGwoW7ZsrmsZGRnMnTsXb29vvv32W4tVaAC2trYMGjSIOXPmMHXqVA4fPkxAQAC3b9+mU6dOfPjhh3zwwQfmkyR3797N3Llzefvtt80VT3FxcWzatIm0tDRee+01AGbNmmU+ETKH0WikVKlSzJ07l99//51SpUpx7tw53n//fWrUqMH48ePp27cvV69eJTo62ryVLioqiosXL9KsWTOKFi0KwPr16ylWrBgNGzYkIiKCqlWr5huWPM/HxweA6OhonJ2dqVu3LklJSRiNRpydnc2nTQIUKVKE4cOHYzQaCQ8Pz3fdW7du0bBhQzZv3szUqVMBaNWqlXnL6jvvvGOusIuIiKBu3brs3r2bvn37smXLFs6fP8/JkydJSUkhLS0NLy8vhg4dip+fH9u2bePrr79m7ty51K9fn8TERPbv389PP/1kPvny9ddfJyAggKtXr2Iymejbty/Lli2jbt26dO/enRYtWnDs2DHz+wYGBrJx40ZOnjxJvXr12LVrFwEBARiNlmMHg8FAcHAwwcHBREREAODh4cGYMWM4deoUc+bM4e7du+zatYtdu3YRFRWFi4uL1fUgu7Ju+PDhBAYGUrx4cUqXLk2rVq0IDg7GwcHBPM/Nzc3iaas5ihcvnu/vRv4eFIiJiIiIiIj8xcTGxnL16lWLY+np6ZQoUYJffvmFq1evMmnSJPPpkgD+/v7mk/2e17BhQ3N1lMlkYvPmzfj5+TFmzBhefvllzp8/z7Bhw8zbHHNkZmayYMECSpUqRUREBBMmTMDW1pbt27fj7+9PREQE8+bNsxg0FS5cmGbNmpGens7+/fu5cOECo0aNAuC7777jrbfesviu27dvZ+rUqdjZ2eHg4EBAQABTpkzh/fffJz09nU8++YQpU6YwcOBAAgICgOwQCWDmzJlAdoAYFhbGw4cP6dixI+Hh4TRq1ChPKPi8Z6uvypcvj729PdHR0QA0bdqUsLAwvL29efToEfv37+fevXvm+TnvsGbNGqvrp6SkEBgYyNatW+nZsyeTJ09m586dQPbWv5z+ZW+88QZr1qzB0dGRQ4cO0bp1a44fP07z5s1ZunQpZcqUYcaMGcyZM4d3332X8+fPA9mB4rhx4zh69Cgmk4nAwEC++eYb+vXrx9WrV+nevTuQXWU4atQoFixYwP79+7l48SK+vr7MnDmTpKQkAgMDGTBgQK4DGgICAlizZg2RkZH5bmEE6Ny5M7t37yYgIIDWrVuzadMmsrKyMBgMBAYGMnfuXGbPnk1QUBCFCxfm448/pnz58kyYMIGLFy9aXLNw4cJ07tyZUaNGMWzYMJo3b54n/LK1taVevXpW3yvnxFH5e1MgJiIiIiIi8hdz+/btfMeHDBlCz549LW5/tLe3Z/DgwVSrVs18kp6TkxOtWrWiTZs2AJw6dYq2bdvyyiuvUKZMGU6cOMHixYspW7YsZcqU4c0336Rr166UKlWKffv2MWfOHD744AP27t1LlSpVSEtL491336Vjx47Url2bkydP8uDBA6vvW6tWLcLDw9m+fbu5Gmz+/PmMHj06z9xr166xaNEi9uzZw5tvvkn//v2Jj49n2bJlHDt2jIULF/LFF1/w008/kZCQYN4amZCQwM6dOylfvjyBgYFA9vZAg8FAamoqLVq0ICIigsaNG+Pr60uxYsUsvuu1a9dyBWI2NjZUqlTJXK3XtGlTrl+/Tps2bTCZTJhMJjZu3GieX758efr06UNcXJzF78RoNBIaGsry5ct55ZVX2LlzJ82aNaNfv37mfmTDhw/nk08+wWQy0adPH7Zv346trS07d+6ke/fuXLx4kVq1ajFlyhRmzZpl3gLYpk0bvv/++1zf+9GjRxkxYgTjxo2jU6dOZGRksGbNGkJDQ2ncuDENGjTI845JSUl8/PHHLFiwgF27dlG1alUmTpyYa6thzZo1ef/99y1+hzkGDRrElStXWLVqFampqQQFBVG1alW+++474uPjWb58OZs2bSIlJYWyZcvSo0cP3njjDZYuXUrVqlVp0aIFy5YtIzU1Nd/nWNK6dWtzUJrDzs6OoKAgvLy8/sfryV+PAjEREREREZG/GGvN2P/suJubG71792bChAmMHTuW8ePH07x5c2JjYwkODqZOnTrcunWLzZs3m7e+Pevhw4dMnTqVESNG4OTkxKlTpxgxYgRGo5FLly7RpEkTZs+ezcyZM9myZQuFCxfm8ePHVt/Hy8uLxYsXM2nSJAAWLlzI8OHD88y7cOECS5Ys4caNGzg7O1O0aFEqVKjAhAkT8PLyYsSIEQQGBhIUFMTMmTMZN26ceYvoZ599RmZmJh9++KF5vfXr11OuXDmKFStGWloaWVlZNG7cGFtbWwYOHJgraMnMzCQyMpLw8PBcTfUhu7F+ToVYkyZNgH9vpSxSpEiubZMA77//PpmZmcydO5dOnTphY2PD5cuXqVq1KqNGjWL8+PE8fvyYuLg4ChUqRHp6Os7OzvTo0YMnT54AMGnSJEaPHk1mZiZdunQhLCwMo9HI6tWrGTZsGDdv3sTX15euXbuyadMmUlNT8fDwYMSIEYwaNcrcw8zR0ZFZs2axfft2Tp8+jZ+fH+vXr6dz584EBwdb/Z1FRkYSHBxMdHQ077zzDrNmzcLHx4f58+ebvx9PT0+qVKli8X6j0Yibmxu2trb06tWL8PBwwsPDqVOnDuPGjaNv374WDwewtbVlyZIlrFixAhsbGwYOHGgOaXO2XuaIi4tjw4YNzJo1i2+++Ybt27ebe5rZ2NjQpUsXxowZQ1BQEN27d+ftt9+mdu3aVj+z/L0oEBMREREREfmL8fb2thp6GY1G/Pz8/tQ6jo6OeHh4kJqaykcffYSPjw+hoaHMmTOH48eP07p161zzTSYTy5Ytw9fXl127drF8+XI2b95MuXLlAFi+fDm1a9fm4cOHhIWFMX78eIxGI46OjhZPBsyRmJjIZ599BsDixYt5/fXX88zJyspi69atFnuYZWRkMGvWLKKiopg/fz6ffPIJjo6O5gqlzMxMvv/+e5ycnMy9yZKSkti1axeZmZl06NCBw4cP4+7ubm6I7+bmxqlTp4iKiqJSpUqEhISYn/98IObj42MOxIoXL061atW4ePEi7u7uPHr0iF27dpGQkGCe7+fnZ27yv3jxYt555x327dvHypUrKVq0KMHBwTRq1Ijff/+d4OBgDh8+TOfOnTEajUyePJljx47x5MkTvvnmG3r27ElaWhrt27cnIiICg8HA999/z8SJE4mLi6NGjRp4eXmxdu1a2rRpQ+/evTl+/DgdO3Y0H8AA0L59eyIjI2nWrBmvvvoqw4YNs9iY/9nvPCUlBVdXVz799FMuXrxIx44dGTlyJP7+/mzZsgWAHj16UL9+fXO1oq2tLY8ePWLq1KlUrFiRTz/9lIcPHwLZW3ZXrVrFlStXLFam5Thz5gx9+/Y1N+YfMWIEq1evJiAggPr167NgwQKuXr3KwoULOXXqFAkJCcTHx3Po0CEWLVqUq9F/0aJFqV27NjVr1rTYkF/+vhSIiYiIiIiI/MXY2trSvXv3PL2RDAYDL730Eh4eHn9qnczMTH744Qd8fHyYMWMGb731FseOHaNo0aJ8+eWX5v5TkZGRXLt2jZdeeomBAwfSoUMHzp07R79+/TAYDCQnJzN48GAGDBhA165dOXHiRK4eTTk9oaxZt24dAEuXLmXIkCEW59y5cydXqPS8c+fOMXbsWBwdHVm0aBGTJ0+mcOHCAKxcuZKkpCQGDRqEra0tANu2bSMtLY2bN2/SqVMnwsLCaNiwoblpe0ZGBvfu3aNkyZJkZWWRkpKCyWTC1tY2zwmRPj4+xMTEmIOyZs2acfDgQZo2bcrjx49JS0szB0Q5Jk6cSGZmJosWLSI+Pp6pU6eyYcMG9u3bB0BoaCgODg58+eWXTJgwAXd3dzp27IiLiwuhoaF888033Lhxg2XLltG6dWtSU1Np3bq1ubfW559/TkhICCkpKXz88ceEh4dTrVo1fH196dy5MzVr1qR58+acPn3a/E7FihVj7dq1LFy4kOXLl3P27Fmr33dqaipt27Y19w/z9PRk6dKlHD9+nBIlSvDSSy/Rvn17zp49S6dOnZgwYQLjxo3jvffeY8aMGVy8eJH+/fvz+eefU6FCBT766CPi4uKA7G2lzzbBf15KSgpZWVlAdjg8depUrl+/zrp16yhRogQjR47kn//8p8WqxPj4eA4cOGB1bSk4FIiJiIiIiIj8BVWsWJHRo0fTsmVLatasSaNGjRg5ciR16tQxz0lLS+POnTu5KmJy7Nixg9q1a/PGG2/QunVrLly4wOTJk/n11185d+6cuRLrwYMHrF27lt69e3P69Gk2btzIihUrzCfxnThxgjp16vDrr7/yr3/9i59++glXV9c8z2vUqFGeiiOTycRvv/3G2bNnWbZsGYMGDbL6eS1tn3uWvb09n3zyCe+//z5eXl6MHDnSPPbxxx9jMBj45JNPzNfWr19P+fLlMRgMtG3blvDwcBo3bsyTJ0/YvHkz06dPJygoiGLFinHnzh0MBgMGgwEbGxuLFWIZGRnmgw6aNm3KmTNn6NWrFwClSpXi119/zXVP8+bNqVu3LllZWYSEhNCnTx8CAwMZP348WVlZeHh4MH/+fEwmEzY2Nnkq7FJTU1m1ahUAGzZsICAggISEBFq1amXuNfb222/zzTffULNmzTzfV5EiRahfvz6NGzc2B5KQHV6+/vrrnDx5khs3blj9vm/cuMHRo0epVKkSU6ZMISMjg9OnT3PkyBFatmzJZ599hoeHB/Xr12fo0KHcvXsXd3d3cyDp6enJN998Q0xMDEOHDiUkJAQvLy8++OADcxBpTYkSJfKcNmlnZ0fXrl3ZvHkzly9fzreh/4ULF6yOScGhQExEREREROQvys3NjRYtWtC9e3fat29vDqkyMjLYtm0bX375JfPnz2fmzJnmKqmoqCg6depEhw4dcHd35/DhwyxfvhwvLy+OHDmSqzH6s9q1a8fp06d55ZVXgOww6+uvv6Zhw4a4urpy8uRJBg4caPHepKQkunbtyuzZs/H09KRdu3bcuXOH2bNns2/fPlasWEH//v2tfs5Hjx4xZMgQi8FejsDAQE6cOMGmTZv44osvzBVGp0+fJjo6miZNmphP20xLSyM0NBRXV1fq1q1LfHw88fHxNGzYkOXLl3P8+PFcoVdqaiqdO3cmKyvLYiDm7e0NkKuxPkChQoUwGo2YTCauXLnC7t27iYiIIC0tDYPBYK4Smzt3LnFxcXz11VccP36cFStWANmHI7z88ssWD0eA7Eqp8+fP4+Liwo4dO6hYsSJ37tyhVatW3L17F8BiOJnDy8uLsmXL0q1bNz755BNz1RVAlSpV2LBhA5mZmXk+b0BAAEuWLGHp0qU4ODjw8ccfExQUxLp167hz5w6Q/TdYo0YNPvroIzZu3IiPjw8ff/xxnt9h6dKlCQkJISYmhtGjR/Pdd99RoUIF8xZUSxo3bmx1DKBMmTL5jmdmZuY7LgWDAjEREREREZG/mdDQUA4fPpwryLhw4QLTp0+ndu3aXLp0iV9//ZX9+/dTv35985yYmBira9rY2Ji3Ct69e5eXX36Zt99+mzfffJOwsDB8fHxISUlh69athISEMHXqVJYvX86pU6do2bIl4eHhbN26lYEDB/LVV18xf/58EhISWLVqFX369LH6XJPJxNChQ7l8+TKRkZEW56SmpjJw4EDeffddAgMDzZVZABMmTABgxowZ5mv79u0jISGB69evm7dLGgwGihUrZq6uel69evXMTeCfD4jKlSuHvb29OcSpWLEiZcqU4ciRI9SvX5++ffvSpUsXDh48yPr165k9ezbXrl2jS5cuVK5cmbS0NGbPnk3Tpk3p1q0bEydONDf0Hzp0qNXvBjBvIy1atCh79uyhePHi/PHHH7Rr146EhIR8Q0Sj0UhcXByVKlXin//8J7179841387Ojk8++YSWLVty4MABdu7ciZeXF126dMHW1pZBgwYRGxtLjx49rDajz8jIIDQ0lFGjRvHFF19QpUoVFi9enCeUKlGiBNOmTePq1au8/fbbzJs3j+3bt+cK6ezs7GjXrh3+/v75fif29vaULVvW6nh+1WNScCgQExERERER+RuJi4vj1KlTFsfs7Oz4/PPPiYqKolu3bhgMhlzjNjY2+a5ta2vL7t278ff35+jRo4SGhhISEoKDgwOPHz/mxx9/5MiRIyQnJ5OWlsalS5dYt24dNjY2HDhwgCZNmtCtWzfWrVuHwWBg9erV9OzZM99nfv/996xatQp7e3tCQkLo3r27udLLZDJx+vRpBg0axKZNmzh69CgzZswwb6eLi4tj586deHp60qhRI/Oa69evp1SpUiQmJtKxY0fCwsLw8/MzVzdZYjAYKFeuXK5g8NnvrXLlyuZAzGAw0LRpUw4ePEiHDh3MvcxypKSksHLlStLT03nvvffIzMxk9uzZJCQkMH36dG7dusXXX38NZG8tzM/BgwfN/y5Xrhx79uzB2dmZqKgoOnfuTLFixaze6+TkxC+//MLDhw8pU6YMW7ZsoUmTJvzxxx+55rVp04Zff/2VChUqEBwcTP/+/c2N8N3d3fnss8/y9LN71q1bt/jyyy85f/48TZs25bXXXqNevXrs2bPHPMdkMnHjxg0uXLhAUFCQuZJx/vz5rFy5kvj4eHr27PnC6rBn3/n5v28ABwcHcwWfFGwKxERERERERP5G8uv7BODr62s1vKhWrZrV+4oWLUpISAjt2rWjRo0aRERE0LlzZ/P44cOHefDgQZ77jEYj/fv3p3LlygQFBbF161YMBgO//vor3bt3z/ddjx07xpgxYwBYsGAB1atXp2bNmowcOZJXXnmFzz//nHr16lG5cmU++OADXn75ZSpWrMjy5cuZPn06s2fPplWrVubTJiH7tMoNGzbg5eVF4cKFqV+/PuHh4TRq1Mjc38qa9PR07Ozs8lSIQe6TJiF722R8fLzFUAayq9qioqIYOHAgxYsX59GjR8ydOxdvb29Gjx7N1KlTuXPnDtWqVcPNzc3iGiaTiY8++ojffvvNfM3X15cdO3Zga2vL4cOH+eWXX/L023r2nVu1asWhQ4coVKgQdnZ23L17l8DAwDyN593d3Vm2bJn5ZFF/f3/2798PYPUzPq9ixYqsWrWKsLAwHB0dadOmDa+88gqRkZGsXLmSRYsWsXPnTrZs2cLixYtp164d0dHRDB48mOXLl1O9enVzteCznjx5wqlTp/j999+5dOkSWVlZVKxYkQEDBuTaPlmpUiWCg4PzDQml4FAgJiIiIiIi8jeSX6XOi8br1KljsSLJaDSydetWZsyYwRdffMGOHTsoXbp0rjk5/bMsefjwId27d2fPnj0YjUbWrVtH165d833PuLg4goKCMJlMDB48OFfD/aysLEaPHk2VKlUYP3488+fP548//uCtt95i6dKlXLp0icePH2M0GmnWrFmukyGPHz/OzZs3SUxMpH379iQlJXH27FkaN26cbyCYnp7O5cuXLW6ZhOw+Ys9+B82aNbMaZOWIj4/H0dGRt99+G4CZM2fy6NEjPvroI2xtbZkyZQp2dnb069cvz1oeHh4MHz6c5s2b07t3b27evGkea9CgARs2bMBkMrFu3TquXr2Ki4uLedxoNHLu3DlGjBhBREQEPj4+hIeHU7t2be7du0exYsVo3bo1CxcuzPPO/fr14/Tp03h5edGyZUs+/PBDypQpY7XPGZCnQX6jRo0ICwtj1apVREVFMXHiRPPpmDmysrLYvn079+/f5/333+fq1atMmzaN0NBQqlatyqBBg7hw4QLR0dF8/fXXbNiwgV27drF8+XIWLFhAQkIClSpV4o033uD999/ngw8+YODAgfk265eCRYGYiIiIiIjI30jlypVxdHS0Ol6jRg2rY7a2tgwcOJBWrVpRokQJ3N3dcXNzY+nSpZw9e5aDBw/y/vvvW6w4yjmV0prjx49jMBjYsGGDuTG/pTWSk5N58uQJffv25e7du3h7ezN37txc83744QcOHTrEvHnzSElJ4ZNPPmHo0KFERUVZfI/bt29z8uRJIHu7pIeHB+fPn6djx44cOnQIyG7UXq5cuVyndD5r+/btPHr0yOKWSciutoqJiTGP+fn58fjx43y/Ew8PDwBGjBiBk5MT8fHxLFy4kCJFijB58mQWLFjA2bNnKVmyJGPHjqV48eJs3bqVkydPMmbMGEqXLs3PP/+MnZ0dvXr1Ii0tzbx2hw4dWLZsGVlZWSxevJiLFy8yYMAA+vTpw9tvv823336Lp6cnLVq0IDQ0lE2bNtGqVSsmTpxItWrVaNCgAcOGDWPMmDF5Pq+Xlxd79+7ls88+Y8aMGbRq1YpatWpZ/IzXrl3jlVdeYenSpbl+NwaDgV69ehEREUG9evWsfkdHjx4FwMXFhfHjxxMTE8PXX3/Nnj17qF+/PsuWLePJkye57rlz5w5r1qwxP8/BweGFQbEUPArERERERERE/kbs7Ox45ZVXLG5ja9iwYb7NxiG7gqx58+YMGjSIqKgo3n77berVq8epU6do2LCh1fsqV65sdezBgwckJCSwadOmXNssc5hMJg4fPsysWbMICQlh2rRpuLq64u7uzvr163F2djbPvXv3Lu+99x7BwcE0b96cadOm8fjxY8aOHWvua2VJTgXS+vXr8fPzw2Qy0aFDB8LDwylWrJj5/V9++WWCgoIoV64c7u7ueHt7c/jwYY4dOwaQ75bJzMxMrl69CmT3FStVqpTVUMzJyckcThYuXJiRI0dia2vLjBkzePLkCaNHj6ZChQq8++675vVGjRqF0Whkw4YNLFu2DMhuRr969WqOHj1qPkAgR9++fZk9ezYmk4mFCxfy/fffU6VKFVxcXChevDh79uyhZcuWHD58mKtXr2IymTAajfj7+1O/fn0aNWrE999/T4cOHbh//36utW1sbJg4cSLh4eEkJibSu3dvPDw8KFOmDDY2Nri7u9OiRQs+/fRTOnTowJAhQ2jbtm2eSsJnQzxL4uLi8nxvb775JpcvX2by5MlWt4PeuHGD27dv57u2FGwKxERERERERP5mqlevzhtvvEFAQABlypShatWq9OnTh/bt2+eal5mZyblz59i9ezdhYWHmEwtzqnZWrFjBokWL+Pnnn3F3d8/3mQ0aNMjTPB6yt77t3r2bzZs307FjR4v3HjhwgG3btpGYmGi+5ufnx/jx4/MEbe+88w5Go5EZM2Zw7do1Zs2axfjx4ylatGi+72cymbh48SJnz541hz5lypQhLCyMxo0bmwNEg8FA7dq1ee211xg3bhz9+/c3h1wODg4UL14cb29vYmNjc1U8+fj4AOTpI/bLL7/g6uqa612cnZ3p06dPrkq+cePGYTKZuH37NkuXLsXBwYFp06axZcsW1q9fz9GjRzlz5gxr167FwcGBYcOGmcOiRo0a8dVXXzF79mxWrlyZ61ljx45l0qRJAHz55ZdMmzbNPObm5kaHDh0sHqbg7u5O4cKF8fb2JiIigvr161s85bNevXqcOHGCAQMG8NZbbxEaGsqIESMYN24cLVu2pEyZMixbtoxt27YRExODn58fU6dONVedFSpUKN/DHCz9TUH27yK/EBbyhmkizzKYXlTXKiIiIiIiIn87SUlJLFu2jLt375qvGY1GnJycmDRpElWrVmXlypX4+vr+j9bcs2cPZ86cIT09nRs3bnDw4EHmzJlDmzZtLN7z+PFjvvrqK4vbEAE6depE/fr1Adi7dy+tW7dm0aJFvPbaawwZMoStW7dy6dIlnJ2dmTVrFsnJyRbXadu2LWFhYXz00Ue4urry2muv8dlnn+Hh4cGkSZN47733rH6u8uXLU6hQIbp06ZIrxKpcuTI9evTA0dGRrKwsnJ2dmTFjBmPHjgVg3759tGzZkuPHj/Phhx+SkpLCtGnTqFu3rsUtfK+99ho///wzpUqVIjo6mvT0dCZMmJCrCbyDgwOurq6MGTOGJk2amE+ZNJlMDBgwgA0bNnDkyBGqV69uvsdkMjFixAgWLlyIyWRi7ty5jBw5kuTkZEJCQqx+bldXV2bOnInBYKBQoULcvHmTZcuW4evraz5Ewd3dnbp161KrVi02btzI0KFDsbOzY8mSJXTo0CHXeikpKXz88ceEhITg6+vLwoULadiwIRs3bjRvaX1ev379zGHj8/bu3Wtu7G/J66+//sKKSCm4VCEmIiIiIiJSAK1fvz5XGAbZ1VyPHj1i5MiRHD58+IVhWGZmJkeOHGHRokV899137Ny5k/Lly/Pjjz8ybdo0li9fzrx586yGYQCxsbFWwzDAXJ315MkTRo4cSdOmTRkyZAgRERH861//YsqUKbi6umJjY0Pbtm0trlGkSBHq1avH+vXradiwIffu3aNjx46cOXOG5ORkGjVqlO/ndHBwMAdfz7p8+TKhoaFAdphYuXLlXBVi9evXx87OznyK5f79+7l48aLVflbvvvsujx8/5urVq/z888+EhobmORHxyZMnxMXF0a5dO37//XeWLFkCZFe2LViwgAoVKtC9e3eSkpLM9xgMBubOnUvXrl2xsbFh9OjRLF++PN/PDNmVbEeOHKFEiRLcvHmTgIAAZsyYwcqVK4mJiSExMZHr16+zfv16tm3bRpcuXYiMjMTf35+OHTvy1ltv5doy6uzszPTp0zl27BiOjo40btyYMWPG0LhxYypWrJjr2QaDgdatW1sNwwBq165ttbqsdOnSuU6YFHmeAjEREREREZECJj4+nitXrlgdb926db6N+SE7PFu1ahVbt27lxo0bPHjwgMjISNatW4eDgwNGo5Ft27bRqlWrfNfJb7scZDf6B5gxYwaXL19m/vz5GI1GJkyYgI+PD6+//rp5rr+/P82aNePWrVtAdr+vgIAAgoODiY+PJzw8nCJFiuDq6krjxo0JDw/H1tY236buAL6+vlbfMyoqyhw++fj45ArEnJycqFevHgcOHKB///4ALF26NN/nBAUF4eLiwldffUVUVJTFeVlZWYwcORIHBwdGjBjBgwcPgOzG87/++is3b95k6NChpKenExMTw5UrV8jMzGTFihXmUzcHDx7M3r1785wW+ixvb288PT05cOAArVu3JiIignbt2lmce+TIEW7dukXp0qXZsmULs2fP5vvvvycwMJDIyEgePHjA+vXrmTVrFgcPHuSLL74gJCSEJUuWEBAQQKFChRg0aBAtW7akXbt2vPnmmzRr1szqu0H2dsru3bvnOeGyaNGi9OzZ02IfPZEctv+vX0BERERERET+s3J6hf1vxyE7CHo2/MlhY2NDhw4d6Nat2wsDDQBPT09sbW0tNqoHqFatGpcuXeLzzz/nnXfeoUaNGuzYsYMdO3awdu3aPGHIxo0b+fXXX4mJicHFxcUcivz8888YDAZiY2Np06YN9vb2hIWFUbt27VxN+5+XmZlptY8VZG9HjIuLw9XVFR8fH9auXZtrvGnTpixfvpxKlSrh4eFBeHg4WVlZVpvBv/fee2zcuJH4+HirzwR49OgR8+bN47XXXiMoKIjff/8dgKpVq7J48WI+/fRTpk6dau5zZm9vT6tWrdiwYQMtWrTg3Llz9OjRg1WrVnH37l0yMzNzrV+4cGEaN24MZPf5WrduHR9++KHV9wa4cOECpUuXxmg0MnbsWFq3bk2/fv146aWXGDp0aK65YWFhFCtWjJMnT/L222/TvXt3unbtaj79MkdCQgIpKSkULVrUYmWdr68vXl5eREVFkZycTMmSJalateoLg1YRVYiJiIiIiIgUMEWKFPn/NQ5w/vx5q2OFCxd+YcPzHFeuXGHt2rVYam9dpUoVqlWrxujRoyldujSTJ08mKyuLCRMm0KRJE7p27ZprfnJyMosXL+aNN96gUKFCJCUlsX37dhYsWMCFCxfo06cPJ0+eNDf3DwsLe+F2yUePHr0wIMw5cMDHx4erV6/mOjmxbt26+Pn5sWzZMnr16oXBYDCfWGlJ48aNadKkicXv41lubm4EBwfTqFEjwsLCWLx4sXnM39+fV199NdcaaWlpbN++natXr7Jt2zY8PT2xsbFhwIABNGjQAB8fH+zs7HB2dqZu3boMHTo0V1BoY2NDjx498n2n50PNmjVrcuTIEQYPHmxx/v3797lx4wYbN25k9erVHDp0iOrVqzNnzhzu3r3L4sWLmTVrFgsWLCAkJIQ9e/aQlZWVZx1nZ2cCAwNp1aoV1atXVxgmf4oCMRERERERkQLGzc2NatWqWRwzGo0v3EII5Nv368+MQ3aT9R49epCcnEyfPn3w9fXF3d2d0qVL07FjR3r16sXq1avZsWMH3333Hc7OzixbtoyIiAi+/PLLPFvifvrpJ5KTkxkxYgT379/n+++/59ChQ9y6dQtXV1eqVKnCwIEDadOmDXfu3OHKlSvmKihrkpOTOXHihNXxypUrmyvIvL29ycrKMvc9O3jwIOfPn6dRo0ZcuXKF0qVLM2bMGJYtW5bvM9977z2uXbuW77bWOnXqALBp0ybz1sn79+8DmKvFLDl48CAlS5Zk586duLq6YjKZ6N27NwEBAUycOJF3332Xl19+mUKFCuW5t0KFCvm+9/N9wHLkbHu15Pz58xgMBnr06MG5c+fo378/7777Ll9//TXXrl0zz0tLS+PAgQPs2bMn33cQ+bMUiImIiIiIiBRAQUFBeHl55bpmb29Pt27dKFGixAvvf/7e59fJrzcVZG81HD16NNHR0axZs4Zq1arRq1cvxo0bx7Bhw2jQoAHJycmMGzeObt260bJlSx49esSkSZPo3r17nsouk8nEd999R9euXSlfvjzbtm0jJSUlz3M9PT25desW4eHhAC8MxJKSkrhx4wZbt27NU51UvHhxgoKCzD/nNICPjo4mNjaW3bt351nPxcUFo9GYbwXYSy+9hK+vL3v37s1zaqbBYKBDhw7mbYVFixZl3rx5pKWlmd8lp4eaJffu3SMjI4OKFSuyc+dObG1tSUtLo23btsTExOT7XRQrVoyAgACLYxUrVqRSpUp5rj+/FTO/8cKFCzNv3jwWLFhgtYfd4cOHSU1NzXdNkT9DPcREREREREQKICcnJwYPHsz169eJjY3F0dGRatWqvbCZfo66dety5MgREhMT84w1a9bM6kmKOX788UeWLFnC0qVL8fPzszhn0qRJ1K1bl7p16zJz5kwgexvlp59+mmfunj17OHv2LHPmzOHx48dcvnzZ6rPPnj3L5cuX8fT0pFy5cvm+Z04gdfz4cS5evEhISAhly5albNmyVKtWLdf2vLJly+Lo6Eh0dHS+Dd09PDyYP38+NjY2ZGRk4ODggIeHBwEBAVSsWNF8aEBwcDBff/01y5cvp3Dhwjg5OVGzZs08W1qDg4NZuHAh4eHhLFq0CGdnZ548eWLx2Q4ODuZ3rlWrFps3b6Zdu3YkJibStm1bDh48mG+Y+corr+Du7s6RI0dITU0lIyODkydP4uPjY/EzOzs7U7x4ce7du2dxPUtVZ/lteczIyOD27dtWq9FE/iwFYiIiIiIiIgWUwWCgfPnylC9fPt95JpOJS5cucfLkSZKTkylevDi+vr6sXbuWihUrmvuFFSpUiKZNm1K/fv08azx48ID4+Hg8PDy4du0ao0ePZtiwYQwaNMjiM48ePcqDBw8IDAzMtf2yWbNmnDt3Dl9f31zzv/32W2rWrEmLFi3yVFU9Ly0tjfDw8Bf2D7t//z6RkZF07NiRBw8ecPLkSSpUqGD15Eyj0Yi3tzfR0dGULVs237Xv3r2b6+cbN24QGRlJ0aJFqVevHj179uTDDz/k8ePHLFy4kM2bN5OVlWX1AICNGzfi6enJqFGjWLduHUePHrU4r1atWrmCq2bNmrF69Wq6du3KnTt3aN++Pfv27SM9PZ2bN2/i6OiIt7e3OeA0Go20bNmS5s2bk5qaisFgYPjw4fTv35/Lly8zadKkXOsbDAbatGnDypUr87yLo6MjTZo0yXPdwcEh3+/uReMif4bB9KJOfSIiIiIiIlKg7dy5k7CwsFzXsrKyCA0NZdGiRVSrVo3Hjx/j6uqa5xTCpKQk1q9fn6sfVmxsLJGRkezZs8diRVpGRgadOnWiadOmVt9pxIgRlCxZEoCrV69SuXJl5s2bx7BhwzCZTHz77bdWT2qsVasWffv2Zdq0aYwbN87inLCwMHbu3Jnr2v379wkKCqJdu3ZW3+vVV18lJSWFDz/8kL1791qd9yKlS5cmISGB2bNn065dO3MlVcmSJWnbti3e3t557vnxxx8ZOnQojRo1Yvjw4eZeZs+uOWjQIIvf+ZIlSwgODsbd3Z2+fftSqlQp85iDgwNdunTJE0LmMJlMfPbZZ3z00UcMGDCAH374AQcHBzIyMnj48CFOTk7cvHmTPXv2cOfOHSC731r79u0pXrx4nvUuXLhgMUCD7AMfxowZk28FnsifoQoxERERERERsermzZt5wjDIrhTq3r07NWrUwNbW1mLVTlZWFitWrOD27du5rpcpU4aaNWtarfSZO3dunhMLnxcTE2MOxObOnYubmxv9+/cHsquSWrVqxdq1a/PcZ29vj5ubG0+ePLHaP+z69et5wjDI7qF1/vz5fAMxHx8f1qxZQ506dQgPD+fx48f5fg5rbt26RaFChRg8eHCu7ad37txhxYoV9OvXL08o9tprr7Fo0SLCwsIYMmQI3bp14+LFi5hMJipVqkStWrWsNrgfMmQI9+7d4/fff88VhgE8efKENWvWMHz4cIv95QwGA5MnT6ZKlSoMHjyYmJgYJk6cSGRkpPnz+/j40KdPH/OWzfy21FapUgV/f38iIiJyXbezsyMoKEhhmPwfoUBMRERERERErDpz5ozVsbS0NGJiYszN5J93+fLlPGFYjri4OC5fvpwn1Ll58yaTJk1i2LBh+b5XTp+plJQUfvjhB4YOHYqLi4t53M/Pj6ysLPbu3UtCQgKQfdrhoEGDCJ86FRNAgwZ5Fw4P58TTKqZnOaSm8ua33+KSkkKSqyuuQ4bknnD8OHz4IZ/t28fkx49x7tqVV157jdXR0eYp7vHxjJs92+pnOlGnDpueadKfnJxsDo6a7d9P6z17uFu8OPNGj2bv3r3//u6++AI2boTLlzmYlMRlg4GtI0dSKjISv+7dcz8kKwtmzoR58+DWLahSBT74APr2ZfTo0Tx69Mjiu2VlZXHkyBFefvllq+/fu3dvvLy8mDp1ap4tm9HR0SxdupQRI0a8sL+cwWCgS5cu+Pj4cPr0aVJTUyldujQNGjTI0z9N5H9LgZiIiIiIiIhYZa05+58Zz++0Q8jeOvl8IPaPf/wDJycnRo4cybJlyyzeZzAYqFKlCgA///wzDx8+ZNSoUXnm+fv7U6NGDdasWcOwYcM4ceIEZcuW5cyZM/QAGDsWAgNz3+TtTeIzIVaOVnv3Yve0l9nj1FRcnx08cQKaNoVy5bgWHMz38+bx+fnzeL/+OkWHDuVBsWIApLi4sPbVV/Os7X3pErUiI7n8tBfb81wTEmh64ABpdnbma7GxsTx58iS7yu74cQgIgD59MLi6kvrLLwzasYPkwEA2//gjCRkZlCxZknr16lF4+nSYNg3eeCP7s2/YAP36gcFAXNOm+VZf3b9/3+pYjlq1alGvXr08J3ICPHz4kFOnTtHAUhD5HIPBQI0aNahRo8YL54r8bygQExEREREREas8PT05efKk1fH8msdbawBvbXzr1q2sXr2a5cuXU7lyZRo1akR4eHie+1q2bIm7u7u5V9hLL71EpUqVcs15+PAhu3bt4vz582RmZjJkyBByWmhHRUVlT2rWDHr0yLN+8eLFc/U8K37nDvWOHWN/ixa02ruXQoUK5b5h8mRwcoLwcBxTU5k5bx5NJ02i41tv0Xr3blb37g1Aur09kf7+eZ4XcOoUjx0cuPA05Hte+x07uOHpiTErC+eUFPP1xYsX4+zsjNfYsdSpUwdX1+yYzm/oUGa3aMFb+/cTvno1l/z8uHTpEmd37uTNr7/GMHo0fPdd9iKvvw4tWsC77+L63BbF57m5ueU7DtlBnaUwLMcff/zxpwIxkf/bjC+eIiIiIiIiIgWVn5+f1W1q/v7+eHh4WL3X19fXas8qW1tbqlevbv45NTWV0aNH07ZtW/r27QtAu3btePXVVylfvjxubm5UqFCBXr160bx5cwAOHjxIREQEb775Zq61k5OT+fHHH4mKiiIzMxOAokWLsmLFCg4ePMi9ZyudkpLguX5l9erVM2/JBOi4bRvnq1Xjj6encTo5OeX+MAcOQNu2ULQoZcqUwcnJiai4OP7w8qLKxYvY5VNFVygpiQpXr3LO15fMZyrAcpS/epXqZ8+yvWPHPGN37twhJiaG3377jVmzZrFmzRquX7/OrVu3cKpWDQDHZ3qYVT5zBkN6OpnDh/97EYMBRo6EGzdwP3vWfGKoJXXq1LE6luNF2yFfNC7yn6JATERERERERKyys7Nj0KBBubY22traUr9+/Xz7SQG4uLhYbIJuMBgICgrKVSH2+eefExsby9y5c83zDQYDtWrVIjg4mH/84x8MHjw410mH3377LVWrVqVt27a51j906BBJSUkW32n//v3//iE4GNzcwNERWrWCY8eA7Ob5PXv2xNHRkepRUZS7fp2d7dpx7949yx/0yZPsCjGyDxvw8fEhLS2NdDs7bDMzKXH3rtXvqOaZMxhNJiL9/PKMGbKy6LR1Kyfq1OHu0wMErMnKzOTKkSOsmj2bk998Q8etW8kyGLj69HRKgNK3b5NmZ8cfz1fu1a+f/f+TJwkKCqLY0y2ez6pTpw5eXl4kJCSQkJBgrrZ7nqenZ76VZNoCKf8ttGVSRERERERE8uXu7k7//v1JTEzk0aNHFClSxOoJkc/z8/OjVKlSnDhxgvj4eDw8PKhTpw43btxg4cKFxMfH4+TkxJYtW/jggw+sNuh/3o0bN1i7di2zZs3CaMxd6/HsdsfnpaenU75y5eyeW507Q7FicPZsdqP5Zs0gLAxq16Zq1ar8Y8QITNWqcaBePRI8PPAqXhyAsPBw6rz8Mo6OjtmLVq0Khw5BZibY2FCtWjVsMjMpe/MmAG5JSdy09v2cPk1SoULEVKyYZ6zesWMUfviQnwYNeuH34ZKczDshIeafE9zc+LV7dx48fWfIrkZLLlSI1OdPvixdOvv/sbG4ubkxfPhwzp07x40bN0hPT2fy5Mns3buXP/74gwcPHgDZoWH79u3z/L6MRiOvvPIKK1euNFfnmT+rn1+ennEi/68YTNZiXREREREREZH/C7Zt28bhw4fzXG/YsCEdOnT4U2tMmjSJb775hps3b5p7Z+X44YcfuHnTWgQFMTExLFmyJPfFS5egVi1o3hy2bQMgc9IkUr/9lm9GjybdwYEG4eF03L7d+ksNHgwTJjDrq69oe+gQvufOYZOVxWN7exzT0vilZ0/OPVMh5XfqFN3Wr7e41E/9+9N97VoONmtGeOPGAAybP5+i9+9jAJ44OBBVowa727Qh3cEBY0YGXn/8QZmbN2m7Z4/FNW+WLo19ejo2Fy78extsWBi8+272/52dYejQ7FMrn+mTFhYWxrZt28zbSM0nXpYowaNDh6hoIcy7c+cOhw4d4tatW7i4uFCrVi1q1aqVb9N+kf8kVYiJiIiIiIjIf8zdu3cthmGQvdWxfv36FvuSZWZmcuzYMU6fPk1KSgpXr15l6NChecIwAB8fH6uB2O3bt6mfs0XwWd7e0KULrF2bXel1/TqGkBB2d+hA+nPVcNHe3kT6+dG6dWsKFy6cffH4cZgzB5YuZRxws0wZfm/ShOYHDmDzXKVUjkoxMQBE1qxJ9HOVVjXPnCHVyYnDT9+15K1blLx9m0xbW7Z36IBbYiKNw8IoEhfHigEDyLK1JaZyZbKeVstFVa9OjbNnCWvYkNtPK8D8IyJwz8qiUE4YduoUtGmTXeEG2VVzCxZAdDRs3Wp+l5iYGHMYluvES5OJffv2WQzESpYsSZcuXSx+bpH/BgrERERERERE5D/m4sWLLxx//hTCrKwsVq1aRXR0tPmaj48PBoOBixcvUuW50xkbNGhAVFRUnp5fRqORrVu3MmLECMsPL1cO0tLg0SP46CNSixThaoUKuMfHA+CUmgpAopsb18qX50rDhtSpVy/73gED4KOPSD91im3h4RxPS+OVjRsBOFG3Lg2OHMnzuEpPt3ae8/XNVTlW5MEDuq5fz/aOHXF92gutw7ZtmIxGEt3cuOTtzRMHBx4WLkzQpk1UunSJK89tRYyqUYPy167hmpzMTn9/TCYThSMiqJCQwMYNG3B0cqL59Ok4enjAkiVQuzb06ZPdV+2NN2DHDmjfHsg+GTLH8ydeXrt2DZPJpMov+ctRICYiIiIiIiL/MVlZWf/j8bNnz+YKw3KYTCZCQ0Px9vbO1UfM0dGR4OBgDhw4wNmzZ0lPT8fLy4vLly/z4MEDatasafnhV65kN9gvVAiuXcMlNpa3Zs/OM63uiRPUPXGCCz175rqe6ujIkvPnuZuejsFgoEZUFOm2tpyrVi1PIFb2xg3cnmn8b//kCem2tphsbHBNTMRoMtFp61Y6PVOpBVA0Lo5xs2dzqEEDdrZrR4ft26kRFZUnEAOwzcjAPSuLQYMGkZSUxKUDB7C5coXrO3aQ6OZGm99+41rPnpQ/ezb7hoAAaNAA/vEP+OUXcyBmb2+f3Xvt6YmX348YQactW7KfYeUUUZH/dvrLFRERERERkf+YSpUqsXfv3nzHn3f+/Hmr8xMTE4mNjcXT0zPXdScnJ9q3b0+7du24fPkyZ8+e5fbt23Tt2pXM27exKVs290IREbBxI3TqBEYjfPYZiTExbN2yxXyiYpULF6hz6hQZRiO2WVlUad48u+fYl19CvXr89ttv3H16omT7bdtwTEtjf7NmYKF6yi8y0vzvLhs24JCWRpbBwB9eXuxv1oyVvXubx4vdv0/b3btJcHPDZDCwrWNH4osUIcvWljslSlD61q0867+6bh12GRmUO3uWjIED2dq6NVf9/cn87TcCjx7ljJ8fNllZHM7IoMSsWTiWLQuNG4ONTXYwdvKkeS1fX1+OHzli8cTLGjVqqDpM/pIUiImIiIiIiMh/TNmyZalevTpnc6qSnuHv70/JZ8KWHBkZGfmuaW3cZDKxfv16Tp8+DUCRIkUoUqQId1q1olTFitg1bw4lSmSfMrlgQXZT+WnTsm9u2hS3pk3x8vFh+9NG+q6JiXDqFBH16lE6OJgyDx9mn07ZpAkEBOBcrBi1bW0pd+0aAadOEefhwW8tW1L+2rVc72XIyqLGmTPcLV6c+8WKEe3jQ4qzM8Xv3aNxWBj9fv6ZH4cONff+MkZFAZDi7IxNZiYXfH3Na6Xb2+N54wadQ0O5X6wY7vHxPHR3xy0xkWQXF47XrUuT06fpdPgwPw4dyqGGDWkSFkbRp6dFNjx8GMcbN2D58uwwDLJPnTxwwPyMli1b4rJkSZ4TL402NrRu3Trf343IfysFYiIiIiIiIvIfYzAY6NatG8WKFePYsWOkpKTg4uJCYGAgzZo1s3iPl5cXFy5csDhmb29P6afB0fNOnz5tDsOedaZyZZyjoyl67BgkJkLx4tCtG/zzn9nN9Z+Kj48nOTmZmJgYfHx8sKlVC7ZuperIkRQaMiR7Uo8eULMmXL5MvTNncHjyhCcODqTZ27PgjTcw5YRMz6h05QqFHj3iQPPmHHmmX9rFatU4W706I+fNo82uXSwfOBAAu6eBn8nCZ0x1dCTLaKRCTAz+p05hzMoiwd2dIw0asL9ZM1JdXHAcNow6Q4Zkr9m/P4+dnGgYHg6Ay6NHHBwxgqb9+v17UUdHeNovDcDl8WNa7NnD5eBgCvv44G4y4e7ujnt6OkYLhxqI/BUoEBMREREREZH/KBsbG1q1akXLli1JT0/Hzs4u3213derU4ejRo8Q/bW7/rGbNmuHw3CmQOSIiIixeP9KwIUcaNmTChAk4OTlZnBMdHc0vv/xCRkYGFStWJCMjg8NOThQ/doy6dev+e6K3N7z6Kqxdyy9z5pBw5gyj58xhy0sv8cTZ2eLal729+XjKFItj8UWLcr5aNXzPncOQlYXJaCT9aZ+u7Z06cc3LK9d8A5Dq7MzcMWMsrgfg5OfHhWrVqHbuHAbgYLNmPChShF6rV7Oha1fKdO2a+4bHj+HZ72XSJAxFiuA9ezbe9vbZ11asgPv3rT5T5L+d8cVTRERERERERP7PMxgM2Nvbv7AHlYODA0OGDKF69ermua6urnTq1IkmTZpYve/Ro0f5rpuSkmLxelpaGmvXrrW4FXPLli08fPgw98Wnp1M2r1uXVnv3kujmZj6d0j0+nkLJyQC4pKRkn1j5goMFEt3csM3MxC4tDYDkp1VYhZ5pwp+jUHIySS+o0ipWrBgO3t4W13RPSSEwMDD3DbduQZky2f+Ojs7eTjp2LMTGwtWr2f89fgzp6dn/jovL9/ki/41UISYiIiIiIiL/9dzc3OjZsydpaWk8efKEQoUKvTBIK126tLnJ/fMcHR0pXLiwxbHo6GgeP35scSwrK4szZ87QtGnTf198ejplZX9/Htnb4xIXZ/F0ypdCQwGY9t57PLFSmQbgER9Puq0taU+rse6WKEGm0UiZ2FjOPnNCpjEjg1K3b3O2Rg2ra7m7u1OqVCkwGMi0s8uzZis3NzyKFPn3DWlpcOoU9OqV/fPNm9kB3tix2f89r2JFeOstmDXL6juI/DdSICYiIiIiIiJ/Gfb29tjnbNt7gYYNGxIZGUmWhYqshg0bYmOhvxdYryxzfvSIFBeX3OPPnU7p8vXXZN69S8LTKrLChQtjPHsWJk/G9O673KlUiUqurly4cgXHpCRSXFxyPaPk7dtUvXCBaB+f7NMugSeOjlypVIlap0+zv0UL0p5uEfU/fRqHtDSiqlfP84456tatizEyEjZvhk6dGDN2LLGxsTg5OWE8eRKPzZshKQlyqsx++gmSk6Fnz+yfa9aEdevyfhmTJmXfN3s2VK5s8fsS+W9mMOWcHysiIiIiIiLyN3PhwgU2b95M8tNtizY2NtSvX5+2bdtiNFruInTjxg0WLVqU5/qgJUtIt7PDrUMHStWq9e/TKe3sIDwcnjn9MZfffoNWrWD16uwm/EBcXBwJdeuSbmfHjXLleOTiQvF796hz/DhZRiOLXn+d+8WLm5coFRvL0EWLuFe8OMfr1sUtMZFG4eH84eVlbr7/7DveKFeOlEKFaF+uHPaLF1t+xxMnoHFjqF4dhg2DGzcgJASaN4enJ2ta1bJldg+xM2fynyfyX0qBmIiIiIiIiPytZWZmcu3aNdLT0ylbtiwuz1VlPc9kMrF06VL++OOPXNfrHzpE7XPnKJmUhCHndMo2bfKcTpmHhUAM4OjAgZTeu5cicXE4PHlCirMzVypVYl+LFpgqV87Tq6zcH3/QdtcuSt+6RZq9PVE1arC7bVtzxVjOO/pFRlIkLg7HtDSMJUrk/44HD8J772WHY66u2Vslp079d8WYNQrE5C9OgZiIiIiIiIjIc1JSUli/fj3R0dHma56ennTr1g0PD4//I89ITEzkp59+4v5zpzW2a9eO6tWr88MPP7zwYABrKlasSP/+/a1uCxUp6BSIiYiIiIiIiFgRFxfHgwcPcHNzo2TJkv/H18/IyODcuXPcvHkTJycnatasSdGiRQG4d+8ey5cvJyEh4X+0pre3Nz179vzTvdZECiIFYiIiIiIiIiL/pZKTk9m1axdnzpwhMzMz37kuLi4EBgbSrFkzq/3RRCSbAjERERERERGR/3IpKSmcPHmSEydOEBcXl2usfPnyBAYG4uvrqy2SIn+SAjERERERERGRv5CMjAweP36M0WjE0dFR1WAi/wsKxEREREREREREpEBRjCwiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCgKxEREREREREREpEBRICYiIiIiIiIiIgWKAjERERERERERESlQFIiJiIiIiIiIiEiBokBMREREREREREQKFAViIiIiIiIiIiJSoCgQExERERERERGRAkWBmIiIiIiIiIiIFCj/H+Xh6T+FaAD5AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "draw_centrality_graph(second_hop_df,12, pr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lets run edge betweenness centrality to find the central edges in the graph." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
betweenness_centralitysrcdst
1940.00121660383375408585
1950.00121660383375450504
1960.00121660383375497430
2070.000993108104916038337
2080.000509108104918234228
380.00038582342287398259
3070.0003041118269510810491
2910.0001301130407410810491
2030.0000871151436110810491
3010.0000871131496910810491
\n", + "
" + ], + "text/plain": [ + " betweenness_centrality src dst\n", + "194 0.001216 6038337 5408585\n", + "195 0.001216 6038337 5450504\n", + "196 0.001216 6038337 5497430\n", + "207 0.000993 10810491 6038337\n", + "208 0.000509 10810491 8234228\n", + "38 0.000385 8234228 7398259\n", + "307 0.000304 11182695 10810491\n", + "291 0.000130 11304074 10810491\n", + "203 0.000087 11514361 10810491\n", + "301 0.000087 11314969 10810491" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G_2_hops = cugraph.from_cudf_edgelist(second_hop_df,create_using=cugraph.Graph(directed=True),source='source', destination='target')\n", + "results=cugraph.edge_betweenness_centrality(G_2_hops).sort_values(ascending=False,by=['betweenness_centrality'])\n", + "results.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now load the enrichment data. We are only loading the patent_id and title but other columns are available.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8890049" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "title_df = cudf.read_csv(\"/home/dacosta/data/g_patent.tsv\",\n", + " sep='\\t',\n", + " header=0,\n", + " usecols=[0,3],\n", + " names=[\"patent_id\", \"patent_title\"],\n", + " dtype={\"patent_id\":\"str\",\"patent_title\":str},\n", + ")\n", + "len(title_df)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
betweenness_centralitysrcdst
1940.00121660383375408585
1950.00121660383375450504
1960.00121660383375497430
2080.000993108104916038337
970.000509108104918234228
570.00038582342287398259
2910.0003041118269510810491
3230.0001301130407410810491
2030.0000871151436110810491
3330.0000871131496910810491
\n", + "
" + ], + "text/plain": [ + " betweenness_centrality src dst\n", + "194 0.001216 6038337 5408585\n", + "195 0.001216 6038337 5450504\n", + "196 0.001216 6038337 5497430\n", + "208 0.000993 10810491 6038337\n", + "97 0.000509 10810491 8234228\n", + "57 0.000385 8234228 7398259\n", + "291 0.000304 11182695 10810491\n", + "323 0.000130 11304074 10810491\n", + "203 0.000087 11514361 10810491\n", + "333 0.000087 11314969 10810491" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G_2_hops = cugraph.from_cudf_edgelist(second_hop_df,create_using=cugraph.Graph(directed=True),source='source', destination='target')\n", + "results=cugraph.edge_betweenness_centrality(G_2_hops).sort_values(ascending=False,by=['betweenness_centrality'])\n", + "results.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Merge the enrichment data with the highest ranking patent ids found above." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['vertex'], dtype='object')\n" + ] + } + ], + "source": [ + "# top number of Patents we are interested in\n", + "k=20\n", + "\n", + "dc_top = dc.sort_values(by='degree_centrality', ascending=False).head(k)\n", + "bc_top = bc.sort_values(by='betweenness_centrality', ascending=False).head(k)\n", + "kr_top = kc.sort_values(by='katz_centrality', ascending=False).head(k)\n", + "pr_top = pr.sort_values(by='pagerank', ascending=False).head(k)\n", + "ev_top = ev.sort_values(by='eigenvector_centrality', ascending=False).head(k)\n", + "\n", + "df_list = [dc_top, bc_top, kr_top, pr_top, ev_top]\n", + "combined = cudf.concat(df_list, axis=0)\n", + "combined = cudf.DataFrame(combined['vertex'])\n", + "print(combined.columns)\n", + "combined = combined.drop_duplicates()\n", + "enriched_df = title_df.merge(combined, left_on='patent_id', right_on='vertex', how='inner')\n", + "enriched_df = enriched_df.drop('vertex', axis=1 )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Display the results of the enriched dataframe. We do this in pandas in order to override the default max column width." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
patent_idpatent_title
010074111Automatically determining a current value for a home
110176435Method and apparatus for combining techniques of calculus, statistics and data normalization in machine learning for analyzing large volumes of data
210228959Virtual network for virtual machine communication and migration
310318474Data storage system with heterogenous parallel processors
410379502Control system with machine learning time-series modeling
510388274Confidence checking for speech processing and query answering
610402061Productivity tools for content authoring
710432689Feature generation for online/offline machine learning
810460012Enhanced document services
910599924Semantic page segmentation of vector graphics documents
1010621019Using a client to manage remote machine learning jobs
1110649619System and method of using context in selecting a response to user device interaction
1210706450Artificial intelligence system for generating intent-aware recommendations
1310810491Real-time visualization of machine learning models
1410884769Photo-editing application recommendations
1510929916Persona based food recommendation systems and methods
1610951485System, method, and computer program for operating a multi-stage artificial intelligence (AI) analysis in a communication network
1710956132Unified code and data management for model development
1811036811Categorical data transformation and clustering for machine learning using data repository systems
1911086471Visualizing neural networks
2011151471Method and apparatus for predictive classification of actionable network alerts
2111182695Method, apparatus, and computer program product for machine learning model lifecycle management
2211304074Systems and methods for orchestration and optimization of wireless networks
2311314969Semantic page segmentation of vector graphics documents
2411347756Deep command search within and across applications
2511392998System and method for collecting and managing property information
2611412041Automatic intervention of global coordinator
2711514361Automated artificial intelligence radial visualization
2811537439Intelligent compute resource selection for machine learning training jobs
2911544604Adaptive model insights visualization engine for complex machine learning models
3011682052Machine learning systems and methods for determining home value
3111694090Debugging deep neural networks
3211710076Method, apparatus, and computer program product for machine learning model lifecycle management
3311769180Machine learning systems and methods for determining home value
3411798167Long-term and continuous animal behavioral monitoring
3511900046Intelligent feature identification and presentation
3611907191Content based log retrieval by using embedding feature extraction
3711921730Deep command search within and across applications
3811941519Machine learning training platform
3911960575Data processing for machine learning using a graphical user interface
4011977958Network-accessible machine learning model training and hosting system
415408585Internal connection method for neural networks
425450504Method for finding a most likely matching of a target facial image in a data base of facial images
435497430Method and apparatus for image recognition using invariant feature signals
446038337Method and apparatus for object recognition
456058454Method and system for automatically configuring redundant arrays of disk memory devices
466356949Automatic data collection device that receives data output instruction from data consumer
476760808Computer system and process for transferring multiple high bandwidth streams of data between multiple storage units and multiple applications in a scalable and reliable manner
487398259Training of a physical neural network
497415356Techniques for accurately synchronizing portions of an aerial image with composited visual information
507703036User interface for displaying selectable software functionality controls that are relevant to a selected object
518234228Method for training a learning machine having a deep multi-layered network with labeled and unlabeled training data
528290939Visualizing query results in stream processing systems
538311967Predictive analytical model matching
548910022Retrieval of encoded data slices and encoded instruction slices by a computing device
558948171System and method for IP multicast
569092802Statistical machine learning and business process models systems and methods
579483742Intelligent traffic analysis to detect malicious activity
589652109Predictive contextual toolbar for productivity applications
599743237Method and apparatus for predicting mobility based on relative mobile characteristics
\n", + "
" + ], + "text/plain": [ + " patent_id \\\n", + "0 10074111 \n", + "1 10176435 \n", + "2 10228959 \n", + "3 10318474 \n", + "4 10379502 \n", + "5 10388274 \n", + "6 10402061 \n", + "7 10432689 \n", + "8 10460012 \n", + "9 10599924 \n", + "10 10621019 \n", + "11 10649619 \n", + "12 10706450 \n", + "13 10810491 \n", + "14 10884769 \n", + "15 10929916 \n", + "16 10951485 \n", + "17 10956132 \n", + "18 11036811 \n", + "19 11086471 \n", + "20 11151471 \n", + "21 11182695 \n", + "22 11304074 \n", + "23 11314969 \n", + "24 11347756 \n", + "25 11392998 \n", + "26 11412041 \n", + "27 11514361 \n", + "28 11537439 \n", + "29 11544604 \n", + "30 11682052 \n", + "31 11694090 \n", + "32 11710076 \n", + "33 11769180 \n", + "34 11798167 \n", + "35 11900046 \n", + "36 11907191 \n", + "37 11921730 \n", + "38 11941519 \n", + "39 11960575 \n", + "40 11977958 \n", + "41 5408585 \n", + "42 5450504 \n", + "43 5497430 \n", + "44 6038337 \n", + "45 6058454 \n", + "46 6356949 \n", + "47 6760808 \n", + "48 7398259 \n", + "49 7415356 \n", + "50 7703036 \n", + "51 8234228 \n", + "52 8290939 \n", + "53 8311967 \n", + "54 8910022 \n", + "55 8948171 \n", + "56 9092802 \n", + "57 9483742 \n", + "58 9652109 \n", + "59 9743237 \n", + "\n", + " patent_title \n", + "0 Automatically determining a current value for a home \n", + "1 Method and apparatus for combining techniques of calculus, statistics and data normalization in machine learning for analyzing large volumes of data \n", + "2 Virtual network for virtual machine communication and migration \n", + "3 Data storage system with heterogenous parallel processors \n", + "4 Control system with machine learning time-series modeling \n", + "5 Confidence checking for speech processing and query answering \n", + "6 Productivity tools for content authoring \n", + "7 Feature generation for online/offline machine learning \n", + "8 Enhanced document services \n", + "9 Semantic page segmentation of vector graphics documents \n", + "10 Using a client to manage remote machine learning jobs \n", + "11 System and method of using context in selecting a response to user device interaction \n", + "12 Artificial intelligence system for generating intent-aware recommendations \n", + "13 Real-time visualization of machine learning models \n", + "14 Photo-editing application recommendations \n", + "15 Persona based food recommendation systems and methods \n", + "16 System, method, and computer program for operating a multi-stage artificial intelligence (AI) analysis in a communication network \n", + "17 Unified code and data management for model development \n", + "18 Categorical data transformation and clustering for machine learning using data repository systems \n", + "19 Visualizing neural networks \n", + "20 Method and apparatus for predictive classification of actionable network alerts \n", + "21 Method, apparatus, and computer program product for machine learning model lifecycle management \n", + "22 Systems and methods for orchestration and optimization of wireless networks \n", + "23 Semantic page segmentation of vector graphics documents \n", + "24 Deep command search within and across applications \n", + "25 System and method for collecting and managing property information \n", + "26 Automatic intervention of global coordinator \n", + "27 Automated artificial intelligence radial visualization \n", + "28 Intelligent compute resource selection for machine learning training jobs \n", + "29 Adaptive model insights visualization engine for complex machine learning models \n", + "30 Machine learning systems and methods for determining home value \n", + "31 Debugging deep neural networks \n", + "32 Method, apparatus, and computer program product for machine learning model lifecycle management \n", + "33 Machine learning systems and methods for determining home value \n", + "34 Long-term and continuous animal behavioral monitoring \n", + "35 Intelligent feature identification and presentation \n", + "36 Content based log retrieval by using embedding feature extraction \n", + "37 Deep command search within and across applications \n", + "38 Machine learning training platform \n", + "39 Data processing for machine learning using a graphical user interface \n", + "40 Network-accessible machine learning model training and hosting system \n", + "41 Internal connection method for neural networks \n", + "42 Method for finding a most likely matching of a target facial image in a data base of facial images \n", + "43 Method and apparatus for image recognition using invariant feature signals \n", + "44 Method and apparatus for object recognition \n", + "45 Method and system for automatically configuring redundant arrays of disk memory devices \n", + "46 Automatic data collection device that receives data output instruction from data consumer \n", + "47 Computer system and process for transferring multiple high bandwidth streams of data between multiple storage units and multiple applications in a scalable and reliable manner \n", + "48 Training of a physical neural network \n", + "49 Techniques for accurately synchronizing portions of an aerial image with composited visual information \n", + "50 User interface for displaying selectable software functionality controls that are relevant to a selected object \n", + "51 Method for training a learning machine having a deep multi-layered network with labeled and unlabeled training data \n", + "52 Visualizing query results in stream processing systems \n", + "53 Predictive analytical model matching \n", + "54 Retrieval of encoded data slices and encoded instruction slices by a computing device \n", + "55 System and method for IP multicast \n", + "56 Statistical machine learning and business process models systems and methods \n", + "57 Intelligent traffic analysis to detect malicious activity \n", + "58 Predictive contextual toolbar for productivity applications \n", + "59 Method and apparatus for predicting mobility based on relative mobile characteristics " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "pd.set_option('display.max_colwidth', None)\n", + "enriched_df.to_pandas()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k2DfAaZaDIBj" + }, + "source": [ + "---\n", + "U.S. Patent and Trademark Office. “Data Download Tables.” PatentsView. Accessed [10/06/2024]. https://patentsview.org/ download/data-download-tables.\n", + "\n", + "Data used is Licensed under Creative Commons 4.0 \n", + "https://creativecommons.org/licenses/by/4.0/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "___\n", + "Copyright (c) 2024, NVIDIA CORPORATION.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n", + "___" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/python/.coveragerc b/python/.coveragerc index 1c33570c05c..9e15f7d1acc 100644 --- a/python/.coveragerc +++ b/python/.coveragerc @@ -1,10 +1,8 @@ # Configuration file for Python coverage tests [run] include = cugraph/cugraph/* - cugraph-pyg/cugraph_pyg/* cugraph-service/* pylibcugraph/pylibcugraph/* omit = cugraph/cugraph/tests/* - cugraph-pyg/cugraph_pyg/tests/* cugraph-service/tests/* pylibcugraph/pylibcugraph/tests/* diff --git a/python/cugraph-dgl/LICENSE b/python/cugraph-dgl/LICENSE deleted file mode 120000 index 30cff7403da..00000000000 --- a/python/cugraph-dgl/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../../LICENSE \ No newline at end of file diff --git a/python/cugraph-dgl/README.md b/python/cugraph-dgl/README.md deleted file mode 100644 index 013d4fe5e2e..00000000000 --- a/python/cugraph-dgl/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# cugraph_dgl - -## Description - -[RAPIDS](https://rapids.ai) cugraph_dgl provides a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) class, which uses cugraph for storing graph structure and node/edge feature data. Using cugraph as the backend allows DGL users to access a collection of GPU accelerated algorithms for graph analytics, such as centrality computation and community detection. - -## Conda - -Install and update cugraph-dgl and the required dependencies using the command: - -```shell -# CUDA 11 -conda install -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam/label/th23_cu118 cugraph-dgl - -# CUDA 12 -conda install -c rapidsai -c pytorch -c conda-forge -c nvidia -c dglteam/label/th23_cu121 cugraph-dgl -``` - -## Build from Source - -### Create the conda development environment -``` -mamba env create -n cugraph_dgl_dev --file conda/cugraph_dgl_dev_11.6.yml -``` - -### Install in editable mode -``` -pip install -e . -``` - -### Run tests - -``` -pytest tests/* -``` - - -## Usage -```diff - -+from cugraph_dgl.convert import cugraph_storage_from_heterograph -+cugraph_g = cugraph_storage_from_heterograph(dgl_g) - -sampler = dgl.dataloading.NeighborSampler( - [15, 10, 5], prefetch_node_feats=['feat'], prefetch_labels=['label']) - -train_dataloader = dgl.dataloading.DataLoader( -- dgl_g, -+ cugraph_g, -train_idx, -sampler, -device=device, -batch_size=1024, -shuffle=True, -drop_last=False, -num_workers=0) -``` diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml deleted file mode 100644 index 85c85c2043a..00000000000 --- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# This file is generated by `rapids-dependency-file-generator`. -# To make changes, edit ../../../dependencies.yaml and run `rapids-dependency-file-generator`. -channels: -- rapidsai -- rapidsai-nightly -- dask/label/dev -- dglteam/label/th23_cu118 -- conda-forge -- nvidia -dependencies: -- cugraph==24.12.*,>=0.0.0a0 -- dgl>=2.4.0.cu* -- pandas -- pre-commit -- pylibcugraphops==24.12.*,>=0.0.0a0 -- pytest -- pytest-benchmark -- pytest-cov -- pytest-xdist -- pytorch-cuda==11.8 -- pytorch>=2.3 -- scipy -- tensordict>=0.1.2 -name: cugraph_dgl_dev_cuda-118 diff --git a/python/cugraph-dgl/cugraph_dgl/VERSION b/python/cugraph-dgl/cugraph_dgl/VERSION deleted file mode 120000 index d62dc733efd..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/VERSION +++ /dev/null @@ -1 +0,0 @@ -../../../VERSION \ No newline at end of file diff --git a/python/cugraph-dgl/cugraph_dgl/__init__.py b/python/cugraph-dgl/cugraph_dgl/__init__.py deleted file mode 100644 index 58850d47fba..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -# to prevent rapids context being created when importing cugraph_dgl -os.environ["RAPIDS_NO_INITIALIZE"] = "1" -from cugraph_dgl.graph import Graph -from cugraph_dgl.cugraph_storage import CuGraphStorage -from cugraph_dgl.convert import ( - cugraph_storage_from_heterograph, - cugraph_dgl_graph_from_heterograph, -) -import cugraph_dgl.dataloading -import cugraph_dgl.nn - -from cugraph_dgl._version import __git_commit__, __version__ diff --git a/python/cugraph-dgl/cugraph_dgl/_version.py b/python/cugraph-dgl/cugraph_dgl/_version.py deleted file mode 100644 index e8adcc31430..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/_version.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import importlib.resources - -__version__ = ( - importlib.resources.files(__package__).joinpath("VERSION").read_text().strip() -) -try: - __git_commit__ = ( - importlib.resources.files(__package__) - .joinpath("GIT_COMMIT") - .read_text() - .strip() - ) -except FileNotFoundError: - __git_commit__ = "" - -__all__ = ["__git_commit__", "__version__"] diff --git a/python/cugraph-dgl/cugraph_dgl/convert.py b/python/cugraph-dgl/cugraph_dgl/convert.py deleted file mode 100644 index ae4b96dd391..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/convert.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations -from cugraph.utilities.utils import import_optional - -import cugraph_dgl -from cugraph_dgl import CuGraphStorage -from cugraph_dgl.utils.cugraph_conversion_utils import ( - get_edges_dict_from_dgl_HeteroGraph, - add_ndata_from_dgl_HeteroGraph, - add_edata_from_dgl_HeteroGraph, -) - -dgl = import_optional("dgl") - - -def cugraph_storage_from_heterograph( - g: dgl.DGLGraph, single_gpu: bool = True -) -> CuGraphStorage: - """ - Convert DGL Graph to CuGraphStorage graph - """ - num_nodes_dict = {ntype: g.num_nodes(ntype) for ntype in g.ntypes} - edges_dict = get_edges_dict_from_dgl_HeteroGraph(g, single_gpu) - gs = CuGraphStorage( - data_dict=edges_dict, - num_nodes_dict=num_nodes_dict, - single_gpu=single_gpu, - idtype=g.idtype, - ) - add_ndata_from_dgl_HeteroGraph(gs, g) - add_edata_from_dgl_HeteroGraph(gs, g) - return gs - - -def cugraph_dgl_graph_from_heterograph( - input_graph: dgl.DGLGraph, - single_gpu: bool = True, - ndata_storage: str = "torch", - edata_storage: str = "torch", - **kwargs, -) -> cugraph_dgl.Graph: - """ - Converts a DGL Graph to a cuGraph-DGL Graph. - """ - - output_graph = cugraph_dgl.Graph( - is_multi_gpu=(not single_gpu), - ndata_storage=ndata_storage, - edata_storage=edata_storage, - **kwargs, - ) - - # Calling is_homogeneous does not work here - if len(input_graph.ntypes) <= 1: - output_graph.add_nodes( - input_graph.num_nodes(), data=input_graph.ndata, ntype=input_graph.ntypes[0] - ) - else: - for ntype in input_graph.ntypes: - data = { - k: v_dict[ntype] - for k, v_dict in input_graph.ndata.items() - if ntype in v_dict - } - output_graph.add_nodes(input_graph.num_nodes(ntype), data=data, ntype=ntype) - - if len(input_graph.canonical_etypes) <= 1: - can_etype = input_graph.canonical_etypes[0] - src_t, dst_t = input_graph.edges(form="uv", etype=can_etype) - output_graph.add_edges(src_t, dst_t, input_graph.edata, etype=can_etype) - else: - for can_etype in input_graph.canonical_etypes: - data = { - k: v_dict[can_etype] - for k, v_dict in input_graph.edata.items() - if can_etype in v_dict - } - - src_t, dst_t = input_graph.edges(form="uv", etype=can_etype) - output_graph.add_edges(src_t, dst_t, data=data, etype=can_etype) - - return output_graph diff --git a/python/cugraph-dgl/cugraph_dgl/cugraph_storage.py b/python/cugraph-dgl/cugraph_dgl/cugraph_storage.py deleted file mode 100644 index 6a1b6ee32b8..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/cugraph_storage.py +++ /dev/null @@ -1,714 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations -from typing import Optional, Sequence, Tuple, Dict, Union -from functools import cached_property -from cugraph.utilities.utils import import_optional, MissingModule -from cugraph.gnn import FeatureStore -from cugraph.gnn.dgl_extensions.dgl_uniform_sampler import DGLUniformSampler -import cudf -import dask_cudf -import cupy as cp -from cugraph_dgl.utils.cugraph_storage_utils import ( - _assert_valid_canonical_etype, - backend_dtype_to_np_dtype_dict, - add_edge_ids_to_edges_dict, - add_node_offset_to_edges_dict, -) -from cugraph_dgl.utils.feature_storage import dgl_FeatureStorage - -dgl = import_optional("dgl") -F = import_optional("dgl.backend") -torch = import_optional("torch") - - -class CuGraphStorage: - """ - Duck-typed version of the DGLHeteroGraph class made for cuGraph - for storing graph structure and node/edge feature data. - - This object is wrapper around cugraph's Multi GPU MultiGraph and returns samples - that conform with `DGLHeteroGraph` - See: https://docs.rapids.ai/api/cugraph/nightly/api_docs/cugraph_dgl.html - """ - - def __init__( - self, - data_dict: Dict[ - Tuple[str, str, str], Union[cudf.DataFrame, dask_cudf.DataFrame] - ], - num_nodes_dict: Dict[str, int], - single_gpu: bool = True, - device_id: int = 0, - idtype=None if isinstance(F, MissingModule) else F.int64, - ): - """ - Constructor for creating a object of instance CuGraphStorage - - See also ``cugraph_dgl.cugraph_storage_from_heterograph`` - to convert from DGLHeteroGraph to CuGraphStorage - - Parameters - ---------- - data_dict: - The dictionary data for constructing a heterogeneous graph. - The keys are in the form of string triplets (src_type, edge_type, dst_type), - specifying the source node, edge, and destination node types. - The values are graph data is a dataframe with 2 columns form of (𝑈,𝑉), - where (𝑈[𝑖],𝑉[𝑖]) forms the edge with ID 𝑖. - - num_nodes_dict: dict[str, int] - The number of nodes for some node types, which is a - dictionary mapping a node type T to the number of T-typed nodes. - - single_gpu: bool - Whether to create the cugraph Property Graph - on a single GPU or multiple GPUs - single GPU = True - single GPU = False - - device_id: int - If specified, must be the integer ID of the GPU device to have the - results being created on - - idtype: Framework-specific device object, - The data type for storing the structure-related graph - information this can be ``torch.int32`` or ``torch.int64`` - for PyTorch. - Defaults to ``torch.int64`` if pytorch is installed - - - Examples - -------- - The following example uses `CuGraphStorage` : - >>> from cugraph_dgl.cugraph_storage import CuGraphStorage - >>> import cudf - >>> import torch - >>> num_nodes_dict={"drug": 3, "gene": 2, "disease": 1} - >>> drug_interacts_drug_df = cudf.DataFrame({"src": [0, 1], "dst": [1, 2]}) - >>> drug_interacts_gene = cudf.DataFrame({"src": [0, 1], "dst": [0, 1]}) - >>> drug_treats_disease = cudf.DataFrame({"src": [1], "dst": [0]}) - >>> data_dict = {("drug", "interacts", "drug"):drug_interacts_drug_df, - ("drug", "interacts", "gene"):drug_interacts_gene, - ("drug", "treats", "disease"):drug_treats_disease } - >>> gs = CuGraphStorage(data_dict=data_dict, num_nodes_dict=num_nodes_dict) - >>> gs.add_node_data(ntype='drug', feat_name='node_feat', - feat_obj=torch.as_tensor([0.1, 0.2, 0.3])) - >>> gs.add_edge_data(canonical_etype=("drug", "interacts", "drug"), - feat_name='edge_feat', - feat_obj=torch.as_tensor([0.2, 0.4])) - >>> gs.ntypes - ['disease', 'drug', 'gene'] - >>> gs.etypes - ['interacts', 'interacts', 'treats'] - >>> gs.canonical_etypes - [('drug', 'interacts', 'drug'), - ('drug', 'interacts', 'gene'), - ('drug', 'treats', 'disease')] - - >>> gs.sample_neighbors({'disease':[0]}, - 1) - Graph(num_nodes={'disease': 1, 'drug': 3, 'gene': 2}, - num_edges={('drug', 'interacts', 'drug'): 0, - ('drug', 'interacts', 'gene'): 0, - ('drug', 'treats', 'disease'): 1}, - metagraph=[('drug', 'drug', 'interacts'), - ('drug', 'gene', 'interacts'), - ('drug', 'disease', 'treats')]) - - >>> gs.get_node_storage(key='node_feat', - ntype='drug').fetch([0,1,2]) - tensor([0.1000, 0.2000, 0.3000], device='cuda:0', - dtype=torch.float64) - - >>> es = gs.get_edge_storage(key='edge_feat', - etype=('drug', 'interacts', 'drug')) - >>> es.fetch([0,1]) - tensor([0.2000, 0.4000], device='cuda:0', dtype=torch.float64) - """ - # Order is very important - # do this first before cuda work - # Create cuda context on the right gpu, - # defaults to gpu-0 - import numba.cuda as cuda - - cuda.select_device(device_id) - - self.idtype = idtype - self.id_np_type = backend_dtype_to_np_dtype_dict[idtype] - self.num_nodes_dict = num_nodes_dict - self._ntype_offset_d = self.__get_ntype_offset_d(self.num_nodes_dict) - # Todo: Can possibly optimize by persisting edge-list - # Trade-off memory for run-time - self.num_edges_dict = {k: len(v) for k, v in data_dict.items()} - self._etype_offset_d = self.__get_etype_offset_d(self.num_edges_dict) - self.single_gpu = single_gpu - - self.ndata_storage = FeatureStore(backend="torch") - self.ndata = self.ndata_storage.fd - self.edata_storage = FeatureStore(backend="torch") - self.edata = self.edata_storage.fd - - self._etype_range_d = self.__get_etype_range_d( - self._etype_offset_d, self.num_canonical_edges_dict - ) - _edges_dict = add_edge_ids_to_edges_dict( - data_dict, self._etype_offset_d, self.id_np_type - ) - - self._edges_dict = add_node_offset_to_edges_dict( - _edges_dict, self._ntype_offset_d - ) - - # Persist the dataframes so they can be retrieved later - # for a multi-GPU workflow. - if not single_gpu: - for k in list(self._edges_dict.keys()): - self._edges_dict[k] = self._edges_dict[k].persist() - - self._etype_id_dict = { - etype: etype_id for etype_id, etype in enumerate(self.canonical_etypes) - } - self.uniform_sampler = None - - def add_node_data(self, feat_obj: Sequence, ntype: str, feat_name: str): - """ - Add node features - - Parameters - ---------- - df : array_like object - The node feature to save in feature store - ntype : str - The node type to be added. - For example, if dataframe contains data about users, ntype - might be "users". - feat_name : str - The name of the feature being stored - Returns - ------- - None - """ - self.ndata_storage.add_data( - feat_obj=feat_obj, - type_name=ntype, - feat_name=feat_name, - ) - - def add_edge_data( - self, - feat_obj: Sequence, - canonical_etype: Tuple[str, str, str], - feat_name: str, - ): - """ - Add edge features - - Parameters - ---------- - feat_obj : array_like object - The edge feature to save in feature store - canonical_etype : Tuple[(str, str, str)] - The edge type to be added - feat_name : string - Returns - ------- - None - """ - _assert_valid_canonical_etype(canonical_etype) - self.edata_storage.add_data( - feat_obj=feat_obj, - type_name=canonical_etype, - feat_name=feat_name, - ) - - # Sampling Function - def sample_neighbors( - self, - nodes, - fanout: int, - edge_dir: str = "in", - prob: Optional[str] = None, - exclude_edges=None, - replace: bool = False, - output_device=None, - ): - """ - Return a DGLGraph which is a subgraph induced by sampling neighboring - edges of the given nodes. - See ``dgl.sampling.sample_neighbors`` for detailed semantics. - Parameters - ---------- - nodes : Tensor or dict[str, Tensor] - Node IDs to sample neighbors from. - This argument can take a single ID tensor or a dictionary of node - types and ID tensors. If a single tensor is given, the graph must - only have one type of nodes. - fanout : int or dict[etype, int] - The number of edges to be sampled for each node on each edge type. - This argument can take a single int or a dictionary of edge types - and ints. If a single int is given, DGL will sample this number of - edges for each node for every edge type. - If -1 is given for a single edge type, all the neighboring edges - with that edge type will be selected. - edge_dir: 'in' or 'out' - The direction of edges to import - prob : str, optional - Feature name used as the (un-normalized) probabilities associated - with each neighboring edge of a node. The feature must have only - one element for each edge. - The features must be non-negative floats, and the sum of the - features of inbound/outbound edges for every node must be positive - (though they don't have to sum up to one). Otherwise, the result - will be undefined. If :attr:`prob` is not None, GPU sampling is - not supported. - exclude_edges: tensor or dict - Edge IDs to exclude during sampling neighbors for the seed nodes. - This argument can take a single ID tensor or a dictionary of edge - types and ID tensors. If a single tensor is given, the graph must - only have one type of nodes. - replace : bool, optional - If True, sample with replacement. - output_device : Framework-specific device context object, optional - The output device. Default is the same as the input graph. - Returns - ------- - DGLGraph - A sampled subgraph with the same nodes as the original graph, but - only the sampled neighboring edges. The induced edge IDs will be - in ``edata[dgl.EID]``. - """ - if self.uniform_sampler is None: - self.uniform_sampler = DGLUniformSampler( - self._edges_dict, - self._etype_range_d, - self._etype_id_dict, - self.single_gpu, - ) - - if prob is not None: - raise NotImplementedError( - "prob is not currently supported", - " for sample_neighbors in CuGraphStorage", - ) - - if exclude_edges is not None: - raise NotImplementedError( - "exclude_edges is not currently supported", - " for sample_neighbors in CuGraphStorage", - ) - - if not isinstance(nodes, dict): - if len(self.ntypes) > 1: - raise dgl.DGLError( - "Must specify node type when the graph is not homogeneous." - ) - nodes = cp.asarray(nodes) - nodes = {self.ntypes[0]: nodes} - else: - nodes = { - k: self.dgl_n_id_to_cugraph_id(F.tensor(n), k) for k, n in nodes.items() - } - nodes = {k: cp.asarray(F.tensor(n)) for k, n in nodes.items()} - - sampled_obj = self.uniform_sampler.sample_neighbors( - nodes, - fanout, - edge_dir=edge_dir, - prob=prob, - replace=replace, - ) - # heterograph case - if len(self.etypes) > 1: - graph_data_d, graph_eid_d = self.__convert_to_dgl_tensor_d( - sampled_obj, self.idtype - ) - sampled_graph = dgl.heterograph( - data_dict=graph_data_d, - num_nodes_dict=self.num_nodes_dict, - idtype=self.idtype, - ) - sampled_graph.edata[dgl.EID] = graph_eid_d - else: - src_ids, dst_ids, edge_ids = sampled_obj - src_ids = torch.as_tensor(src_ids, device="cuda") - dst_ids = torch.as_tensor(dst_ids, device="cuda") - edge_ids = torch.as_tensor(edge_ids, device="cuda") - total_number_of_nodes = self.total_number_of_nodes - sampled_graph = dgl.graph( - (src_ids, dst_ids), - num_nodes=total_number_of_nodes, - idtype=self.idtype, - ) - sampled_graph.edata[dgl.EID] = edge_ids - - # to device function move the dgl graph to desired devices - if output_device is not None: - sampled_graph.to(output_device) - return sampled_graph - - # Required in Cluster-GCN - def subgraph(self, nodes, relabel_nodes=False, output_device=None): - """Return a subgraph induced on given nodes. - This has the same semantics as ``dgl.node_subgraph``. - Parameters - ---------- - nodes : nodes or dict[str, nodes] - The nodes to form the subgraph. The allowed nodes formats are: - * Int Tensor: Each element is a node ID. The tensor must have the - same device type and ID data type as the graph's. - * iterable[int]: Each element is a node ID. - * Bool Tensor: Each :math:`i^{th}` element is a bool flag - indicating whether node :math:`i` is in the subgraph. - If the graph is homogeneous, directly pass the above formats. - Otherwise, the argument must be a dictionary with keys being - node types and values being the node IDs in the above formats. - relabel_nodes : bool, optional - If True, the extracted subgraph will only have the nodes in the - specified node set and it will relabel the nodes in order. - output_device : Framework-specific device context object, optional - The output device. Default is the same as the input graph. - Returns - ------- - DGLGraph - The subgraph. - """ - raise NotImplementedError("subgraph is not implemented yet") - - # Required in Link Prediction - # relabel = F we use dgl functions, - # relabel = T, we need to delete nodes and relabel - def edge_subgraph(self, edges, relabel_nodes=False, output_device=None): - """ - Return a subgraph induced on given edges. - This has the same semantics as ``dgl.edge_subgraph``. - Parameters - ---------- - edges : edges or dict[(str, str, str), edges] - The edges to form the subgraph. The allowed edges formats are: - * Int Tensor: Each element is an edge ID. The tensor must have the - same device type and ID data type as the graph's. - * iterable[int]: Each element is an edge ID. - * Bool Tensor: Each :math:`i^{th}` element is a bool flag - indicating whether edge :math:`i` is in the subgraph. - If the graph is homogeneous, one can directly pass the above - formats. Otherwise, the argument must be a dictionary with keys - being edge types and values being the edge IDs in the above formats - relabel_nodes : bool, optional - If True, the extracted subgraph will only have the nodes in the - specified node set and it will relabel the nodes in order. - output_device : Framework-specific device context object, optional - The output device. Default is the same as the input graph. - Returns - ------- - DGLGraph - The subgraph. - """ - raise NotImplementedError("edge_subgraph is not implemented yet") - - # Required in Link Prediction negative sampler - def find_edges( - self, eid, etype: Optional[Tuple[str, str, str]] = None, output_device=None - ): - """ - Return the source and destination node ID(s) given the edge ID(s). - - Parameters - ---------- - eid : edge ID(s) - The edge IDs. The allowed formats are: - - * ``int``: A single ID. - * Int Tensor: Each element is an ID. - The tensor must have the same device type - and ID data type as the graph's. - * iterable[int]: Each element is an ID. - - etype : Tuple[str, str, str] - The type name of the edges. - Can be omitted if the graph has only one type of edges. - - Returns - ------- - Tensor - The source node IDs of the edges. - The i-th element is the source node ID of the i-th edge. - Tensor - The destination node IDs of the edges. - The i-th element is the destination node ID of the i-th edge. - """ - - if etype: - src_type, connection_type, dst_type = etype - eid = self.dgl_e_id_to_cugraph_id(eid, etype) - # TODO: implement below - src, dst = self.find_edges(eid, etype) - src = torch.as_tensor(src, device="cuda") - dst = torch.as_tensor(dst, device="cuda") - src = self.cugraph_n_id_to_dgl_id(src, src_type) - dst = self.cugraph_n_id_to_dgl_id(dst, dst_type) - - return src, dst - - # Required in Link Prediction negative sampler - def global_uniform_negative_sampling( - self, num_samples, exclude_self_loops=True, replace=False, etype=None - ): - """ - Per source negative sampling as in ``dgl.dataloading.GlobalUniform`` - """ - raise NotImplementedError( - "global_uniform_negative_sampling not implemented yet" - ) - - def get_node_storage(self, key: str, ntype: str = None): - """ - Get storage object of node feature of - type :attr:`ntype` and name :attr:`key` - """ - if ntype is None: - if len(self.ntypes) > 1: - raise ValueError( - "ntype must be provided if multiple ntypes are present in the graph" - ) - else: - ntype = self.ntype[0] - return dgl_FeatureStorage(self.ndata_storage, type_name=ntype, feat_name=key) - - def get_edge_storage(self, key: str, etype: Optional[Tuple[str, str, str]] = None): - """ - Get storage object of edge feature of - type :attr:`ntype` and name :attr:`key` - """ - if etype is None: - if len(self.etypes) > 1: - raise ValueError( - "etype must be provided if multiple etypes are present in the graph" - ) - else: - etype = self.etypes[0] - return dgl_FeatureStorage(self.edata_storage, type_name=etype, feat_name=key) - - # Number of edges/nodes utils - def num_nodes(self, ntype: str = None) -> int: - """ - Return the number of nodes in the graph. - Parameters - ---------- - ntype : str, optional - The node type name. If given, it returns the number of nodes of the - type. - If not given (default), it returns the total number of nodes - of all types. - - Returns - ------- - int - The number of nodes. - """ - if ntype: - return self.num_nodes_dict[ntype] - else: - return self.total_number_of_nodes - - def number_of_nodes(self, ntype: str = None) -> int: - """ - Return the number of nodes in the graph. - Alias of ``num_nodes`` - Parameters - ---------- - ntype : str, optional - The node type name. If given, it returns the number of nodes of the - type. - If not given (default), it returns the total number of nodes - of all types. - - Returns - ------- - int - The number of nodes. - """ - return self.num_nodes(ntype) - - @property - def ntypes(self) -> Sequence[str]: - """ - Return all the node type names in the graph. - - Returns - ------- - list[str] - All the node type names in a list. - """ - ntypes = list(self.num_nodes_dict.keys()) - return ntypes - - @property - def etypes(self) -> Sequence[str]: - """ - Return all the edge type names in the graph. - - Returns - ------- - list[str] - All the edge type names in a list. - """ - - return [can_etype[1] for can_etype in self.canonical_etypes] - - def num_edges(self, etype: Optional[str] = None) -> int: - """ - Return the number of edges in the graph. - Parameters - ---------- - etype: - - Returns - ------- - int - The number of edges - """ - if etype: - if etype not in self.canonical_etypes: - etype = self.get_corresponding_canonical_etype(etype) - return self.num_edges_dict[etype] - else: - return self.total_number_of_edges - - @cached_property - def total_number_of_edges(self) -> int: - return sum(self.num_edges_dict.values()) - - @cached_property - def total_number_of_nodes(self) -> int: - return sum(self.num_nodes_dict.values()) - - @property - def num_canonical_edges_dict(self) -> dict[str, int]: - return self.num_edges_dict - - @property - def canonical_etypes(self) -> Sequence[Tuple[str, str, str]]: - return list(self.num_edges_dict.keys()) - - @property - def device(self): - """ - Get the device of the graph. - Returns - ------- - device context - The device of the graph, which should be a - framework-specific device object (e.g., ``torch.device``). - """ - return torch.cuda.current_device() - - # Index Conversion Utils - def get_node_id_offset(self, ntype: str) -> int: - """ - Return the integer offset for node id of type ntype - """ - return self._ntype_offset_d[ntype] - - def get_edge_id_offset(self, canonical_etype: Tuple[str, str, str]) -> int: - """ - Return the integer offset for node id of type etype - """ - _assert_valid_canonical_etype(canonical_etype) - return self._etype_offset_d[canonical_etype] - - def dgl_n_id_to_cugraph_id(self, index_t, ntype: str): - return index_t + self.get_node_id_offset(ntype) - - def cugraph_n_id_to_dgl_id(self, index_t, ntype: str): - return index_t - self.get_node_id_offset(ntype) - - def dgl_e_id_to_cugraph_id(self, index_t, canonical_etype: Tuple[str, str, str]): - return index_t + self.get_edge_id_offset(canonical_etype) - - def cugraph_e_id_to_dgl_id(self, index_t, canonical_etype: Tuple[str, str, str]): - return index_t - self.get_edge_id_offset(canonical_etype) - - # Methods for getting the offsets per type - @staticmethod - def __get_etype_offset_d(num_canonical_edges_dict): - last_st = 0 - etype_st_d = {} - for etype in sorted(num_canonical_edges_dict.keys()): - etype_st_d[etype] = last_st - last_st = last_st + num_canonical_edges_dict[etype] - return etype_st_d - - @staticmethod - def __get_etype_range_d(etype_offset_d, num_canonical_edges_dict): - # dict for edge_id_offset_start - etype_range_d = {} - for etype, st in etype_offset_d.items(): - etype_range_d[etype] = (st, st + num_canonical_edges_dict[etype]) - return etype_range_d - - @staticmethod - def __get_ntype_offset_d(num_nodes_dict): - # dict for node_id_offset_start - last_st = 0 - ntype_st_d = {} - for ntype in sorted(num_nodes_dict.keys()): - ntype_st_d[ntype] = last_st - last_st = last_st + num_nodes_dict[ntype] - return ntype_st_d - - def get_corresponding_canonical_etype(self, etype: str) -> str: - can_etypes = [ - can_etype for can_etype in self.canonical_etypes if can_etype[1] == etype - ] - if len(can_etypes) > 1: - raise dgl.DGLError( - f'Edge type "{etype}" is ambiguous. Please use canonical' - + "edge type in the form of (srctype, etype, dsttype)" - ) - return can_etypes[0] - - def __convert_to_dgl_tensor_d( - self, - graph_sampled_data_d, - o_dtype=None if isinstance(F, MissingModule) else F.int64, - ): - - graph_data_d = {} - graph_eid_d = {} - for canonical_etype, ( - src, - dst, - edge_id, - ) in graph_sampled_data_d.items(): - src_type = canonical_etype[0] - dst_type = canonical_etype[2] - - src_t = _torch_tensor_from_cp_array(src) - dst_t = _torch_tensor_from_cp_array(dst) - edge_id_t = _torch_tensor_from_cp_array(edge_id) - - src_t = self.cugraph_n_id_to_dgl_id(src_t, src_type) - dst_t = self.cugraph_n_id_to_dgl_id(dst_t, dst_type) - edge_id_t = self.cugraph_e_id_to_dgl_id(edge_id_t, canonical_etype) - graph_data_d[canonical_etype] = (src_t.to(o_dtype), dst_t.to(o_dtype)) - graph_eid_d[canonical_etype] = edge_id_t.to(o_dtype) - - return graph_data_d, graph_eid_d - - -def _torch_tensor_from_cp_array(ar): - if len(ar) == 0: - return torch.as_tensor(ar.get()).to("cuda") - return torch.as_tensor(ar, device="cuda") diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py deleted file mode 100644 index 8a2e9cd954d..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from cugraph_dgl.dataloading.dataset import ( - HomogenousBulkSamplerDataset, - HeterogenousBulkSamplerDataset, -) - -from cugraph_dgl.dataloading.sampler import Sampler -from cugraph_dgl.dataloading.neighbor_sampler import NeighborSampler - -from cugraph_dgl.dataloading.dask_dataloader import DaskDataLoader -from cugraph_dgl.dataloading.dataloader import DataLoader as FutureDataLoader - - -def DataLoader(*args, **kwargs): - warnings.warn( - "DataLoader has been renamed to DaskDataLoader. " - "In Release 24.10, cugraph_dgl.dataloading.FutureDataLoader " - "will take over the DataLoader name.", - FutureWarning, - ) - return DaskDataLoader(*args, **kwargs) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py deleted file mode 100644 index e220b93f738..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py +++ /dev/null @@ -1,321 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations -import os -import shutil -import cugraph_dgl -import cupy as cp -import cudf -from cugraph.utilities.utils import import_optional -from cugraph.gnn import BulkSampler -from dask.distributed import default_client, Event -from cugraph_dgl.dataloading import ( - HomogenousBulkSamplerDataset, - HeterogenousBulkSamplerDataset, -) -from cugraph_dgl.dataloading.utils.extract_graph_helpers import ( - create_cugraph_graph_from_edges_dict, -) - -dgl = import_optional("dgl") -torch = import_optional("torch") - - -class DaskDataLoader(torch.utils.data.DataLoader): - """ - Sampled graph data loader. Wrap a :class:`~cugraph_dgl.CuGraphStorage` and a - :class:`~cugraph_dgl.dataloading.NeighborSampler` into - an iterable over mini-batches of samples. cugraph_dgl's ``DataLoader`` extends - PyTorch's ``DataLoader`` by handling creation and - transmission of graph samples. - """ - - def __init__( - self, - graph: cugraph_dgl.CuGraphStorage, - indices: torch.Tensor, - graph_sampler: cugraph_dgl.dataloading.NeighborSampler, - sampling_output_dir: str, - batches_per_partition: int = 50, - seeds_per_call: int = 200_000, - device: torch.device = None, - use_ddp: bool = False, - ddp_seed: int = 0, - batch_size: int = 1024, - drop_last: bool = False, - shuffle: bool = False, - sparse_format: str = "coo", - **kwargs, - ): - """ - Constructor for DaskDataLoader: - ------------------------------- - graph : CuGraphStorage - The graph. - indices : Tensor or dict[ntype, Tensor] - The set of indices. It can either be a tensor of - integer indices or a dictionary of types and indices. - The actual meaning of the indices is defined by the :meth:`sample` method of - :attr:`graph_sampler`. - graph_sampler : cugraph_dgl.dataloading.NeighborSampler - The subgraph sampler. - sampling_output_dir: str - Output directory to share sampling results in - batches_per_partition: int - The number of batches of sampling results to write/read - seeds_per_call: int - The number of seeds to sample at once - device : device context, optional - The device of the generated MFGs in each iteration, which should be a - PyTorch device object (e.g., ``torch.device``). - By default this returns the tenors on device with the current - cuda context - use_ddp : boolean, optional - If True, tells the DataLoader to split the training set for each - participating process appropriately using - :class:`torch.utils.data.distributed.DistributedSampler`. - Overrides the :attr:`sampler` argument of - :class:`torch.utils.data.DataLoader`. - ddp_seed : int, optional - The seed for shuffling the dataset in - :class:`torch.utils.data.distributed.DistributedSampler`. - Only effective when :attr:`use_ddp` is True. - batch_size: int - Batch size. - sparse_format: str, default = "coo" - The sparse format of the emitted sampled graphs. Choose between "csc" - and "coo". When using "csc", the graphs are of type - cugraph_dgl.nn.SparseGraph. - kwargs : dict - Key-word arguments to be passed to the parent PyTorch - :py:class:`torch.utils.data.DataLoader` class. Common arguments are: - - ``batch_size`` (int): The number of indices in each batch. - - ``drop_last`` (bool): Whether to drop the last incomplete - batch. - - ``shuffle`` (bool): Whether to randomly shuffle the - indices at each epoch - Examples - -------- - To train a 3-layer GNN for node classification on a set of nodes - ``train_nid`` on a homogeneous graph where each node takes messages - from 15 neighbors on the first layer, 10 neighbors on the second, and - 5 neighbors on the third: - >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) - >>> dataloader = cugraph_dgl.dataloading.DataLoader( - ... g, train_nid, sampler, - ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) - >>> for input_nodes, output_nodes, blocks in dataloader: - ... train_on(input_nodes, output_nodes, blocks) - **Using with Distributed Data Parallel** - If you are using PyTorch's distributed training (e.g. when using - :mod:`torch.nn.parallel.DistributedDataParallel`), - you can train the model by turning - on the `use_ddp` option: - >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) - >>> dataloader = cugraph_dgl.dataloading.DataLoader( - ... g, train_nid, sampler, use_ddp=True, - ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) - >>> for epoch in range(start_epoch, n_epochs): - ... for input_nodes, output_nodes, blocks in dataloader: - ... - """ - if sparse_format not in ["coo", "csc"]: - raise ValueError( - f"sparse_format must be one of 'coo', 'csc', " - f"but got {sparse_format}." - ) - self.sparse_format = sparse_format - - self.ddp_seed = ddp_seed - self.use_ddp = use_ddp - self.shuffle = shuffle - self.drop_last = drop_last - self.graph_sampler = graph_sampler - worker_init_fn = dgl.dataloading.WorkerInitWrapper( - kwargs.get("worker_init_fn", None) - ) - self.other_storages = {} - self.epoch_number = 0 - self._batch_size = batch_size - self._sampling_output_dir = sampling_output_dir - self._batches_per_partition = batches_per_partition - self._seeds_per_call = seeds_per_call - self._rank = None - - indices = _dgl_idx_to_cugraph_idx(indices, graph) - - self.tensorized_indices_ds = dgl.dataloading.create_tensorized_dataset( - indices, - batch_size, - drop_last, - use_ddp, - ddp_seed, - shuffle, - kwargs.get("persistent_workers", False), - ) - - if len(graph.ntypes) <= 1: - self.cugraph_dgl_dataset = HomogenousBulkSamplerDataset( - total_number_of_nodes=graph.total_number_of_nodes, - edge_dir=self.graph_sampler.edge_dir, - sparse_format=sparse_format, - ) - else: - etype_id_to_etype_str_dict = {v: k for k, v in graph._etype_id_dict.items()} - - self.cugraph_dgl_dataset = HeterogenousBulkSamplerDataset( - num_nodes_dict=graph.num_nodes_dict, - etype_id_dict=etype_id_to_etype_str_dict, - etype_offset_dict=graph._etype_offset_d, - ntype_offset_dict=graph._ntype_offset_d, - edge_dir=self.graph_sampler.edge_dir, - ) - - if use_ddp: - rank = torch.distributed.get_rank() - client = default_client() - self._graph_creation_event = Event("cugraph_dgl_load_mg_graph_event") - if rank == 0: - G = create_cugraph_graph_from_edges_dict( - edges_dict=graph._edges_dict, - etype_id_dict=graph._etype_id_dict, - edge_dir=graph_sampler.edge_dir, - ) - client.publish_dataset(cugraph_dgl_mg_graph_ds=G) - self._graph_creation_event.set() - else: - if self._graph_creation_event.wait(timeout=1000): - G = client.get_dataset("cugraph_dgl_mg_graph_ds") - else: - raise RuntimeError( - f"Fetch cugraph_dgl_mg_graph_ds to worker_id {rank}", - "from worker_id 0 failed", - ) - else: - rank = 0 - G = create_cugraph_graph_from_edges_dict( - edges_dict=graph._edges_dict, - etype_id_dict=graph._etype_id_dict, - edge_dir=graph_sampler.edge_dir, - ) - - self._rank = rank - self._cugraph_graph = G - super().__init__( - self.cugraph_dgl_dataset, - batch_size=None, - worker_init_fn=worker_init_fn, - collate_fn=lambda x: x, # Hack to prevent collating - **kwargs, - ) - - def __iter__(self): - output_dir = os.path.join( - self._sampling_output_dir, "epoch_" + str(self.epoch_number) - ) - kwargs = {} - if isinstance(self.cugraph_dgl_dataset, HomogenousBulkSamplerDataset): - kwargs["deduplicate_sources"] = True - kwargs["prior_sources_behavior"] = "carryover" - kwargs["renumber"] = True - - if self.sparse_format == "csc": - kwargs["compression"] = "CSR" - kwargs["compress_per_hop"] = True - # The following kwargs will be deprecated in uniform sampler. - kwargs["use_legacy_names"] = False - kwargs["include_hop_column"] = False - - else: - kwargs["deduplicate_sources"] = False - kwargs["prior_sources_behavior"] = None - kwargs["renumber"] = False - - bs = BulkSampler( - output_path=output_dir, - batch_size=self._batch_size, - graph=self._cugraph_graph, - batches_per_partition=self._batches_per_partition, - seeds_per_call=self._seeds_per_call, - fanout_vals=self.graph_sampler._reversed_fanout_vals, - with_replacement=self.graph_sampler.replace, - **kwargs, - ) - - if self.shuffle: - self.tensorized_indices_ds.shuffle() - - batch_df = create_batch_df(self.tensorized_indices_ds) - bs.add_batches(batch_df, start_col_name="start", batch_col_name="batch_id") - bs.flush() - self.cugraph_dgl_dataset.set_input_files(input_directory=output_dir) - self.epoch_number = self.epoch_number + 1 - return super().__iter__() - - def __del__(self): - if self.use_ddp: - torch.distributed.barrier() - if self._rank == 0: - if self.use_ddp: - client = default_client() - client.unpublish_dataset("cugraph_dgl_mg_graph_ds") - self._graph_creation_event.clear() - _clean_directory(self._sampling_output_dir) - - -def get_batch_id_series(n_output_rows: int, batch_size: int) -> cudf.Series: - num_batches = (n_output_rows + batch_size - 1) // batch_size - print(f"Number of batches = {num_batches}".format(num_batches)) - batch_ar = cp.arange(0, num_batches).repeat(batch_size) - batch_ar = batch_ar[0:n_output_rows].astype(cp.int32) - return cudf.Series(batch_ar) - - -def create_batch_df(dataset: torch.Tensor) -> cudf.DataFrame: - batch_id_ls = [] - indices_ls = [] - for batch_id, b_indices in enumerate(dataset): - if isinstance(b_indices, dict): - b_indices = torch.cat(list(b_indices.values())) - batch_id_ar = cp.full(shape=len(b_indices), fill_value=batch_id, dtype=cp.int32) - batch_id_ls.append(batch_id_ar) - indices_ls.append(b_indices) - - batch_id_ar = cp.concatenate(batch_id_ls) - indices_ar = cp.asarray(torch.concat(indices_ls)) - batches_df = cudf.DataFrame( - { - "start": indices_ar, - "batch_id": batch_id_ar, - } - ) - return batches_df - - -def _dgl_idx_to_cugraph_idx(idx, cugraph_gs): - if not isinstance(idx, dict): - if len(cugraph_gs.ntypes) > 1: - raise dgl.DGLError( - "Must specify node type when the graph is not homogeneous." - ) - return idx - else: - return {k: cugraph_gs.dgl_n_id_to_cugraph_id(n, k) for k, n in idx.items()} - - -def _clean_directory(path): - """param could either be relative or absolute.""" - if os.path.isfile(path): - os.remove(path) # remove the file - elif os.path.isdir(path): - shutil.rmtree(path) # remove dir and all contains diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py deleted file mode 100644 index 4f36353cb18..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from typing import Union, Optional, Dict - -from cugraph.utilities.utils import import_optional - -import cugraph_dgl -from cugraph_dgl.typing import TensorType -from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor - -dgl = import_optional("dgl") -torch = import_optional("torch") - - -class DataLoader: - """ - Duck-typed version of dgl.dataloading.DataLoader - """ - - def __init__( - self, - graph: "cugraph_dgl.Graph", - indices: TensorType, - graph_sampler: "cugraph_dgl.dataloading.Sampler", - device: Union[int, str, "torch.device"] = None, - use_ddp: bool = False, - ddp_seed: int = 0, - batch_size: int = 1, - drop_last: bool = False, - shuffle: bool = False, - use_prefetch_thread: Optional[bool] = None, - use_alternate_streams: Optional[bool] = None, - pin_prefetcher: Optional[bool] = None, - use_uva=False, - gpu_cache: Dict[str, Dict[str, int]] = None, - output_format: str = "dgl.Block", - **kwargs, - ): - """ - Parameters - ---------- - graph: cugraph_dgl.Graph - The graph being sampled. Can be a single-GPU or multi-GPU graph. - indices: TensorType - The seed nodes for sampling. If use_ddp=True, then all seed - nodes should be provided. If use_ddp=False, then only the seed - nodes assigned to this worker should be provided. - graph_sampler: cugraph_dgl.dataloading.Sampler - The sampler responsible for sampling the graph and producing - output minibatches. - device: Union[int, str, torch.device] - Optional. - The device assigned to this loader ('cpu', 'cuda' or device id). - Defaults to the current device. - use_ddp: bool - Optional (default=False). - If true, this argument will assume the entire list of input seed - nodes is being passed to each worker, and will appropriately - split and shuffle the list. - It false, then it is assumed that the list of input seed nodes - is comprised of the union of the lists provided to each worker. - ddp_seed: int - Optional (default=0). - The seed used for dividing and shuffling data if use_ddp=True. - Has no effect if use_ddp=False. - use_uva: bool - Optional (default=False). - Whether to use pinned memory and unified virtual addressing - to perform sampling. - This argument is ignored by cuGraph-DGL. - use_prefetch_thread: bool - Optional (default=False). - Whether to spawn a new thread for feature fetching. - This argument is ignored by cuGraph-DGL. - use_alternate_streams: bool - Optional (default=False). - Whether to perform feature fetching on a separate stream. - This argument is ignored by cuGraph-DGL. - pin_prefetcher: bool - Optional (default=False). - Whether to pin the feature tensors. - This argument is currently ignored by cuGraph-DGL. - gpu_cache: Dict[str, Dict[str, int]] - List of features to cache using HugeCTR. - This argument is not supported by cuGraph-DGL and - will result in an error. - output_format: str - Optional (default="dgl.Block"). - The output format for blocks. - Can be either "dgl.Block" or "cugraph_dgl.nn.SparseGraph". - """ - - if use_uva: - warnings.warn("The 'use_uva' argument is ignored by cuGraph-DGL.") - if use_prefetch_thread: - warnings.warn( - "The 'use_prefetch_thread' argument is ignored by cuGraph-DGL." - ) - if use_alternate_streams: - warnings.warn( - "The 'use_alternate_streams' argument is ignored by cuGraph-DGL." - ) - if pin_prefetcher: - warnings.warn("The 'pin_prefetcher' argument is ignored by cuGraph-DGL.") - if gpu_cache: - raise ValueError( - "HugeCTR is not supported by cuGraph-DGL. " - "Consider using WholeGraph for feature storage" - " in cugraph_dgl.Graph instead." - ) - - indices = _cast_to_torch_tensor(indices) - - self.__dataset = dgl.dataloading.create_tensorized_dataset( - indices, - batch_size, - drop_last, - use_ddp, - ddp_seed, - shuffle, - kwargs.get("persistent_workers", False), - ) - - self.__output_format = output_format - self.__sampler = graph_sampler - self.__batch_size = batch_size - self.__graph = graph - self.__device = device - - @property - def _batch_size(self): - return self.__batch_size - - @property - def dataset( - self, - ) -> Union[ - "dgl.dataloading.dataloader.TensorizedDataset", - "dgl.dataloading.dataloader.DDPTensorizedDataset", - ]: - return self.__dataset - - def __iter__(self): - # TODO move to the correct device (rapidsai/cugraph-gnn#11) - return self.__sampler.sample( - self.__graph, - self.__dataset, - batch_size=self.__batch_size, - ) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataset.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataset.py deleted file mode 100644 index f6fe38fe9f8..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataset.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations -from typing import Tuple, Dict, Optional, List, Union - -import os -import cudf -from cugraph.utilities.utils import import_optional -from cugraph_dgl.dataloading.utils.sampling_helpers import ( - create_homogeneous_sampled_graphs_from_dataframe, - create_heterogeneous_sampled_graphs_from_dataframe, - create_homogeneous_sampled_graphs_from_dataframe_csc, -) - - -dgl = import_optional("dgl") -torch = import_optional("torch") - - -# Todo: maybe should switch to __iter__ -class HomogenousBulkSamplerDataset(torch.utils.data.Dataset): - def __init__( - self, - total_number_of_nodes: int, - edge_dir: str, - return_type: str = "dgl.Block", - sparse_format: str = "coo", - ): - if return_type not in ["dgl.Block", "cugraph_dgl.nn.SparseGraph"]: - raise ValueError( - "return_type must be either 'dgl.Block' or " - "'cugraph_dgl.nn.SparseGraph'." - ) - # TODO: Deprecate `total_number_of_nodes` - # as it is no longer needed - # in the next release - self.total_number_of_nodes = total_number_of_nodes - self.edge_dir = edge_dir - self.sparse_format = sparse_format - self._current_batch_fn = None - self._input_files = None - self._return_type = return_type - - def __len__(self): - return self.num_batches - - def __getitem__(self, idx: int): - if self._input_files is None: - raise dgl.DGLError( - "Please set input files by calling `set_input_files` " - "before trying to fetch a sample" - ) - - fn, batch_offset = self._batch_to_fn_d[idx] - if fn != self._current_batch_fn: - # Remove current batches to free up memory - # before loading new batches - if hasattr(self, "_current_batches"): - del self._current_batches - if self.sparse_format == "csc": - df = _load_sampled_file(dataset_obj=self, fn=fn, skip_rename=True) - self._current_batches = ( - create_homogeneous_sampled_graphs_from_dataframe_csc(df) - ) - else: - df = _load_sampled_file(dataset_obj=self, fn=fn) - self._current_batches = ( - create_homogeneous_sampled_graphs_from_dataframe( - sampled_df=df, - edge_dir=self.edge_dir, - return_type=self._return_type, - ) - ) - current_offset = idx - batch_offset - return self._current_batches[current_offset] - - def set_input_files( - self, - input_directory: Optional[str] = None, - input_file_paths: Optional[List[str]] = None, - ): - """ - Set input files that have been created by the `cugraph.gnn.BulkSampler` - Parameters - ---------- - input_directory: str - input_directory which contains all the files that will be - loaded by HomogenousBulkSamplerDataset - input_file_paths: List[str] - File paths that will be loaded by the HomogenousBulkSamplerDataset - """ - _set_input_files( - self, input_directory=input_directory, input_file_paths=input_file_paths - ) - - -class HeterogenousBulkSamplerDataset(torch.utils.data.Dataset): - def __init__( - self, - num_nodes_dict: Dict[str, int], - etype_id_dict: Dict[int, Tuple[str, str, str]], - etype_offset_dict: Dict[Tuple[str, str, str], int], - ntype_offset_dict: Dict[str, int], - edge_dir: str = "in", - ): - self.num_nodes_dict = num_nodes_dict - self.etype_id_dict = etype_id_dict - self.etype_offset_dict = etype_offset_dict - self.ntype_offset_dict = ntype_offset_dict - self.edge_dir = edge_dir - self._current_batch_fn = None - self._input_files = None - - def __len__(self): - return self.num_batches - - def __getitem__(self, idx): - if self._input_files is None: - raise dgl.DGLError( - "Please set input files by calling `set_input_files` " - "before trying to fetch a sample" - ) - - fn, batch_offset = self._batch_to_fn_d[idx] - if fn != self._current_batch_fn: - df = _load_sampled_file(dataset_obj=self, fn=fn) - self._current_batches = create_heterogeneous_sampled_graphs_from_dataframe( - sampled_df=df, - num_nodes_dict=self.num_nodes_dict, - etype_id_dict=self.etype_id_dict, - etype_offset_dict=self.etype_offset_dict, - ntype_offset_dict=self.ntype_offset_dict, - edge_dir=self.edge_dir, - ) - del df - - current_offset = idx - batch_offset - return self._current_batches[current_offset] - - def set_input_files( - self, - input_directory: Optional[str] = None, - input_file_paths: Optional[List[str]] = None, - ): - """ - Set input files that have been created by the `cugraph.gnn.BulkSampler` - Parameters - ---------- - input_directory: str - input_directory which contains all the files that will be - loaded by HeterogenousBulkSamplerDataset - input_file_paths: List[str] - File names that will be loaded by the HeterogenousBulkSamplerDataset - """ - _set_input_files( - self, input_directory=input_directory, input_file_paths=input_file_paths - ) - - -def _load_sampled_file(dataset_obj, fn, skip_rename=False): - df = cudf.read_parquet(os.path.join(fn)) - if dataset_obj.edge_dir == "in" and not skip_rename: - df.rename( - columns={"sources": "destinations", "destinations": "sources"}, - inplace=True, - ) - dataset_obj._current_batch_fn = fn - return df - - -def get_batch_start_end(fn): - batch_str = fn.split("batch=")[1] - batch_start, batch_end = batch_str.split("-") - batch_end = batch_end.split(".parquet")[0] - return int(batch_start), int(batch_end) - - -def get_batch_to_fn_d(files): - batch_to_fn_d = {} - batch_id = 0 - for fn in files: - start, end = get_batch_start_end(fn) - batch_offset = batch_id - for _ in range(start, end + 1): - batch_to_fn_d[batch_id] = fn, batch_offset - batch_id += 1 - return batch_to_fn_d - - -def _set_input_files( - dataset_obj: Union[HomogenousBulkSamplerDataset, HeterogenousBulkSamplerDataset], - input_directory: Optional[str] = None, - input_file_paths: Optional[List[str]] = None, -) -> None: - - if input_directory is None and input_file_paths is None: - raise ValueError("input_files or input_file_paths must be set") - - if (input_directory is not None) and (input_file_paths is not None): - raise ValueError("Only one of input_directory or input_file_paths must be set") - - if input_file_paths: - dataset_obj._input_files = input_file_paths - if input_directory: - dataset_obj._input_files = [fp.path for fp in os.scandir(input_directory)] - dataset_obj._batch_to_fn_d = get_batch_to_fn_d(dataset_obj._input_files) - dataset_obj.num_batches = len(dataset_obj._batch_to_fn_d) - dataset_obj._current_batch_fn = None diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py deleted file mode 100644 index ecc51006995..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -import warnings -import tempfile - -from typing import Sequence, Optional, Union, List, Tuple, Iterator - -from cugraph.gnn import UniformNeighborSampler, BiasedNeighborSampler, DistSampleWriter -from cugraph.utilities.utils import import_optional - -import cugraph_dgl -from cugraph_dgl.typing import DGLSamplerOutput -from cugraph_dgl.dataloading.sampler import Sampler, HomogeneousSampleReader - -torch = import_optional("torch") - - -class NeighborSampler(Sampler): - """Sampler that builds computational dependency of node representations via - neighbor sampling for multilayer GNN. - This sampler will make every node gather messages from a fixed number of neighbors - per edge type. The neighbors are picked uniformly. - Parameters - ---------- - fanouts_per_layer : int - List of neighbors to sample for each GNN layer, with the i-th - element being the fanout for the i-th GNN layer. - If -1 is provided then all inbound/outbound edges - of that edge type will be included. - edge_dir : str, default ``'in'`` - Can be either ``'in' `` where the neighbors will be sampled according to - incoming edges, or ``'out'`` for outgoing edges - replace : bool, default False - Whether to sample with replacement - Examples - -------- - **Node classification** - To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on - a homogeneous graph where each node takes messages from 5, 10, 15 neighbors for - the first, second, and third layer respectively (assuming the backend is PyTorch): - >>> sampler = cugraph_dgl.dataloading.NeighborSampler([5, 10, 15]) - >>> dataloader = cugraph_dgl.dataloading.DataLoader( - ... g, train_nid, sampler, - ... batch_size=1024, shuffle=True) - >>> for input_nodes, output_nodes, blocks in dataloader: - ... train_on(blocks) - """ - - def __init__( - self, - fanouts_per_layer: Sequence[int], - edge_dir: str = "in", - replace: bool = False, - prob: Optional[str] = None, - mask: Optional[str] = None, - prefetch_node_feats: Optional[Union[List[str], dict[str, List[str]]]] = None, - prefetch_edge_feats: Optional[ - Union[List[str], dict[Tuple[str, str, str], List[str]]] - ] = None, - prefetch_labels: Optional[Union[List[str], dict[str, List[str]]]] = None, - output_device: Optional[Union["torch.device", int, str]] = None, - fused: Optional[bool] = None, - sparse_format="csc", - output_format="dgl.Block", - **kwargs, - ): - """ - Parameters - ---------- - fanouts_per_layer: Sequence[int] - The number of neighbors to sample per layer. - edge_dir: str - Optional (default='in'). - The direction to traverse edges. - replace: bool - Optional (default=False). - Whether to sample with replacement. - prob: str - Optional. - If provided, the probability of each neighbor being - sampled is proportional to the edge feature - with the given name. Mutually exclusive with mask. - mask: str - Optional. - If proivided, only neighbors where the edge mask - with the given name is True can be selected. - Mutually exclusive with prob. - Currently unsupported. - prefetch_node_feats: Union[List[str], dict[str, List[str]]] - Optional. - Currently ignored by cuGraph-DGL. - prefetch_edge_feats: Union[List[str], dict[Tuple[str, str, str], List[str]]] - Optional. - Currently ignored by cuGraph-DGL. - prefetch_labels: Union[List[str], dict[str, List[str]]] - Optional. - Currently ignored by cuGraph-DGL. - output_device: Union[torch.device, int, str] - Optional. - Output device for samples. Defaults to the current device. - fused: bool - Optional. - This argument is ignored by cuGraph-DGL. - sparse_format: str - Optional (default = "coo"). - The sparse format of the emitted sampled graphs. - Currently, only "csc" is supported. - output_format: str - Optional (default = "dgl.Block") - The output format of the emitted sampled graphs. - Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph". - **kwargs - Keyword arguments for the underlying cuGraph distributed sampler - and writer (directory, batches_per_partition, format, - local_seeds_per_call). - """ - - if mask: - raise NotImplementedError( - "Edge masking is currently unsupported by cuGraph-DGL" - ) - if prefetch_edge_feats: - warnings.warn("'prefetch_edge_feats' is ignored by cuGraph-DGL") - if prefetch_node_feats: - warnings.warn("'prefetch_node_feats' is ignored by cuGraph-DGL") - if prefetch_labels: - warnings.warn("'prefetch_labels' is ignored by cuGraph-DGL") - if fused: - warnings.warn("'fused' is ignored by cuGraph-DGL") - - self.__prob_attr = prob - - self.fanouts = fanouts_per_layer - reverse_fanouts = fanouts_per_layer.copy() - reverse_fanouts.reverse() - self._reversed_fanout_vals = reverse_fanouts - - self.edge_dir = edge_dir - self.replace = replace - self.__kwargs = kwargs - - super().__init__( - sparse_format=sparse_format, - output_format=output_format, - ) - - def sample( - self, - g: "cugraph_dgl.Graph", - indices: Iterator["torch.Tensor"], - batch_size: int = 1, - ) -> Iterator[DGLSamplerOutput]: - kwargs = dict(**self.__kwargs) - - directory = kwargs.pop("directory", None) - if directory is None: - warnings.warn("Setting a directory to store samples is recommended.") - self._tempdir = tempfile.TemporaryDirectory() - directory = self._tempdir.name - - writer = DistSampleWriter( - directory=directory, - batches_per_partition=kwargs.pop("batches_per_partition", 256), - format=kwargs.pop("format", "parquet"), - ) - - sampling_clx = ( - UniformNeighborSampler - if self.__prob_attr is None - else BiasedNeighborSampler - ) - - ds = sampling_clx( - g._graph(self.edge_dir, prob_attr=self.__prob_attr), - writer, - compression="CSR", - fanout=self._reversed_fanout_vals, - prior_sources_behavior="carryover", - deduplicate_sources=True, - compress_per_hop=True, - with_replacement=self.replace, - **kwargs, - ) - - if g.is_homogeneous: - indices = torch.concat(list(indices)) - reader = ds.sample_from_nodes(indices.long(), batch_size=batch_size) - return HomogeneousSampleReader(reader, self.output_format, self.edge_dir) - - raise ValueError( - "Sampling heterogeneous graphs is currently" - " unsupported in the non-dask API" - ) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py deleted file mode 100644 index 7ea608e7e53..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Iterator, Dict, Tuple, List, Union - -import cugraph_dgl -from cugraph_dgl.nn import SparseGraph -from cugraph_dgl.typing import DGLSamplerOutput -from cugraph_dgl.dataloading.utils.sampling_helpers import ( - create_homogeneous_sampled_graphs_from_tensors_csc, -) - - -from cugraph.utilities.utils import import_optional - -torch = import_optional("torch") -dgl = import_optional("dgl") - - -class SampleReader: - """ - Iterator that processes results from the cuGraph distributed sampler. - """ - - def __init__( - self, - base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]], - output_format: str = "dgl.Block", - ): - """ - Constructs a new SampleReader. - - Parameters - ---------- - base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] - The iterator responsible for loading saved samples produced by - the cuGraph distributed sampler. - """ - self.__output_format = output_format - self.__base_reader = base_reader - self.__num_samples_remaining = 0 - self.__index = 0 - - @property - def output_format(self) -> str: - return self.__output_format - - def __next__(self) -> DGLSamplerOutput: - if self.__num_samples_remaining == 0: - # raw_sample_data is already a dict of tensors - self.__raw_sample_data, start_inclusive, end_inclusive = next( - self.__base_reader - ) - - self.__decoded_samples = self._decode_all(self.__raw_sample_data) - self.__num_samples_remaining = end_inclusive - start_inclusive + 1 - self.__index = 0 - - out = self.__decoded_samples[self.__index] - self.__index += 1 - self.__num_samples_remaining -= 1 - return out - - def _decode_all(self) -> List[DGLSamplerOutput]: - raise NotImplementedError("Must be implemented by subclass") - - def __iter__(self) -> DGLSamplerOutput: - return self - - -class HomogeneousSampleReader(SampleReader): - """ - Subclass of SampleReader that reads DGL homogeneous output samples - produced by the cuGraph distributed sampler. - """ - - def __init__( - self, - base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]], - output_format: str = "dgl.Block", - edge_dir="in", - ): - """ - Constructs a new HomogeneousSampleReader - - Parameters - ---------- - base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] - The reader responsible for loading saved samples produced by - the cuGraph distributed sampler. - output_format: str - The output format for blocks (either "dgl.Block" or - "cugraph_dgl.nn.SparseGraph"). - edge_dir: str - The direction sampling was performed in ("in" or "out"). - """ - - self.__edge_dir = edge_dir - super().__init__(base_reader, output_format=output_format) - - def __decode_csc( - self, raw_sample_data: Dict[str, "torch.Tensor"] - ) -> List[DGLSamplerOutput]: - return create_homogeneous_sampled_graphs_from_tensors_csc( - raw_sample_data, output_format=self.output_format - ) - - def __decode_coo( - self, raw_sample_data: Dict[str, "torch.Tensor"] - ) -> List[DGLSamplerOutput]: - raise NotImplementedError( - "COO format is currently unsupported in the non-dask API" - ) - - def _decode_all( - self, raw_sample_data: Dict[str, "torch.Tensor"] - ) -> List[DGLSamplerOutput]: - if "major_offsets" in raw_sample_data: - return self.__decode_csc(raw_sample_data) - else: - return self.__decode_coo(raw_sample_data) - - -class Sampler: - """ - Base sampler class for all cugraph-DGL samplers. - """ - - def __init__(self, sparse_format: str = "csc", output_format="dgl.Block"): - """ - Parameters - ---------- - sparse_format: str - Optional (default = "coo"). - The sparse format of the emitted sampled graphs. - Currently, only "csc" is supported. - output_format: str - Optional (default = "dgl.Block") - The output format of the emitted sampled graphs. - Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph". - """ - - if sparse_format != "csc": - raise ValueError("Only CSC format is supported at this time") - - self.__output_format = output_format - - @property - def output_format(self): - return self.__output_format - - @property - def sparse_format(self): - return self.__sparse_format - - def sample( - self, - g: cugraph_dgl.Graph, - indices: Iterator["torch.Tensor"], - batch_size: int = 1, - ) -> Iterator[ - Tuple["torch.Tensor", "torch.Tensor", List[Union[SparseGraph, "dgl.Block"]]] - ]: - """ - Samples the graph. - - Parameters - ---------- - g: cugraph_dgl.Graph - The graph being sampled. - indices: TensorType - The node ids of seed nodes where sampling will initiate from. - batch_size: int - The number of seed nodes per batch. - - Returns - ------- - Iterator[DGLSamplerOutput] - Iterator over batches. The returned tuples are in standard - DGL format: (input nodes, output nodes, blocks) where input - nodes are the renumbered input nodes, output nodes are - the renumbered output nodes, and blocks are the output graphs - for each hop. - """ - - raise NotImplementedError("Must be implemented by subclass") diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/__init__.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/__init__.py deleted file mode 100644 index a1dd01f33d4..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/extract_graph_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/extract_graph_helpers.py deleted file mode 100644 index 0d3d5823097..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/extract_graph_helpers.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations -from typing import Tuple, Dict, Union -import cugraph -import cudf -import dask_cudf -import numpy as np - - -def create_cugraph_graph_from_edges_dict( - edges_dict: Dict[Tuple(str, str, str), Union[dask_cudf.DataFrame, cudf.DataFrame]], - etype_id_dict: Dict[Dict[Tuple(str, str, str)] : int], - edge_dir: str, -): - if edge_dir == "in": - edges_dict = {k: reverse_edges(df) for k, df in edges_dict.items()} - if len(edges_dict) > 1: - has_multiple_etypes = True - edges_dict = { - k: add_etype_id(df, etype_id_dict[k]) for k, df in edges_dict.items() - } - else: - has_multiple_etypes = False - - edges_dfs = list(edges_dict.values()) - del edges_dict - if isinstance(edges_dfs[0], dask_cudf.DataFrame): - edges_df = dask_cudf.concat(edges_dfs, ignore_index=True) - else: - edges_df = cudf.concat(edges_dfs, ignore_index=True) - del edges_dfs - - G = cugraph.MultiGraph(directed=True) - if isinstance(edges_df, dask_cudf.DataFrame): - g_creation_f = G.from_dask_cudf_edgelist - else: - g_creation_f = G.from_cudf_edgelist - - if has_multiple_etypes: - edge_etp = "etp" - else: - edge_etp = None - - g_creation_f( - edges_df, - source="_SRC_", - destination="_DST_", - weight=None, - edge_id="_EDGE_ID_", - edge_type=edge_etp, - renumber=True, - ) - return G - - -def reverse_edges(df: Union[dask_cudf.DataFrame, cudf.DataFrame]): - return df.rename(columns={"_SRC_": "_DST_", "_DST_": "_SRC_"}) - - -def add_etype_id(df: Union[dask_cudf.DataFrame, cudf.DataFrame], etype_id: int): - df["etp"] = np.int32(etype_id) - return df diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py deleted file mode 100644 index 3b7e4502134..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ /dev/null @@ -1,692 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations -from typing import List, Tuple, Dict, Optional -from collections import defaultdict -import cudf -from cugraph.utilities.utils import import_optional -from cugraph_dgl.nn import SparseGraph - -dgl = import_optional("dgl") -torch = import_optional("torch") -cugraph_dgl = import_optional("cugraph_dgl") - - -def cast_to_tensor(ser: cudf.Series): - if len(ser) == 0: - # Empty series can not be converted to pytorch cuda tensor - t = torch.from_numpy(ser.values.get()) - return t.to("cuda") - - return torch.as_tensor(ser.values, device="cuda") - - -def _split_tensor(t, split_indices): - """ - Split a tensor into a list of tensors based on split_indices. - """ - # TODO: Switch to something below - # return [t[i:j] for i, j in zip(split_indices[:-1], split_indices[1:])] - if split_indices.device.type != "cpu": - split_indices = split_indices.to("cpu") - return torch.tensor_split(t, split_indices) - - -def _get_source_destination_range(sampled_df): - o = sampled_df.groupby(["batch_id", "hop_id"], as_index=True).agg( - {"sources": "max", "destinations": "max"} - ) - o.rename( - columns={"sources": "sources_range", "destinations": "destinations_range"}, - inplace=True, - ) - d = o.to_dict(orient="index") - return d - - -def _create_split_dict(tensor): - min_value = tensor.min() - max_value = tensor.max() - indices = torch.arange( - start=min_value + 1, - end=max_value + 1, - device=tensor.device, - ) - split_dict = {i: {} for i in range(min_value, max_value + 1)} - return split_dict, indices - - -def _get_renumber_map(df): - map = df["map"] - df.drop(columns=["map"], inplace=True) - - map_starting_offset = map.iloc[0] - renumber_map = map[map_starting_offset:].dropna().reset_index(drop=True) - renumber_map_batch_indices = map[1 : map_starting_offset - 1].reset_index(drop=True) - renumber_map_batch_indices = renumber_map_batch_indices - map_starting_offset - - map_end_offset = map_starting_offset + len(renumber_map) - # We only need to drop rows if the length of dataframe is determined by the map - # that is if map_length > sampled edges length - if map_end_offset == len(df): - df.dropna(axis=0, how="all", inplace=True) - df.reset_index(drop=True, inplace=True) - - return df, cast_to_tensor(renumber_map), cast_to_tensor(renumber_map_batch_indices) - - -def _get_tensor_d_from_sampled_df(df): - """ - Converts a sampled cuDF DataFrame into a list of tensors. - - Args: - df (cudf.DataFrame): The sampled cuDF DataFrame containing columns - Returns: - dict: A dictionary of tensors, keyed by batch_id and hop_id. - """ - range_d = _get_source_destination_range(df) - df, renumber_map, renumber_map_batch_indices = _get_renumber_map(df) - batch_id_tensor = cast_to_tensor(df["batch_id"]) - split_d, batch_indices = _create_split_dict(batch_id_tensor) - batch_split_indices = torch.searchsorted(batch_id_tensor, batch_indices).to("cpu") - - for column in df.columns: - if column != "batch_id": - t = cast_to_tensor(df[column]) - split_t = _split_tensor(t, batch_split_indices) - for bid, batch_t in zip(split_d.keys(), split_t): - split_d[bid][column] = batch_t - - split_t = _split_tensor(renumber_map, renumber_map_batch_indices) - for bid, batch_t in zip(split_d.keys(), split_t): - split_d[bid]["map"] = batch_t - del df - result_tensor_d = {} - # Cache hop_split_d, hop_indices - hop_split_empty_d, hop_indices = None, None - for batch_id, batch_d in split_d.items(): - hop_id_tensor = batch_d["hop_id"] - if hop_split_empty_d is None: - hop_split_empty_d, hop_indices = _create_split_dict(hop_id_tensor) - - hop_split_d = {k: {} for k in hop_split_empty_d.keys()} - hop_split_indices = torch.searchsorted(hop_id_tensor, hop_indices).to("cpu") - for column, t in batch_d.items(): - if column not in ["hop_id", "map"]: - split_t = _split_tensor(t, hop_split_indices) - for hid, ht in zip(hop_split_d.keys(), split_t): - hop_split_d[hid][column] = ht - for hid in hop_split_d.keys(): - hop_split_d[hid]["sources_range"] = range_d[(batch_id, hid)][ - "sources_range" - ] - hop_split_d[hid]["destinations_range"] = range_d[(batch_id, hid)][ - "destinations_range" - ] - - result_tensor_d[batch_id] = hop_split_d - result_tensor_d[batch_id]["map"] = batch_d["map"] - return result_tensor_d - - -def create_homogeneous_sampled_graphs_from_dataframe( - sampled_df: cudf.DataFrame, - edge_dir: str = "in", - return_type: str = "dgl.Block", -): - """ - This helper function creates DGL MFGS for - homogeneous graphs from cugraph sampled dataframe - - Args: - sampled_df (cudf.DataFrame): The sampled cuDF DataFrame containing - columns `sources`, `destinations`, `edge_id`, `batch_id` and - `hop_id`. - edge_dir (str): Direction of edges from samples - Returns: - list: A list containing three elements: - - input_nodes: The input nodes for the batch. - - output_nodes: The output nodes for the batch. - - graph_per_hop_ls: A list of DGL MFGS for each hop. - """ - if return_type not in ["dgl.Block", "cugraph_dgl.nn.SparseGraph"]: - raise ValueError( - "return_type must be either dgl.Block or cugraph_dgl.nn.SparseGraph" - ) - - result_tensor_d = _get_tensor_d_from_sampled_df(sampled_df) - del sampled_df - result_mfgs = [ - _create_homogeneous_sampled_graphs_from_tensors_perhop( - tensors_batch_d, edge_dir, return_type - ) - for tensors_batch_d in result_tensor_d.values() - ] - del result_tensor_d - return result_mfgs - - -def _create_homogeneous_sampled_graphs_from_tensors_perhop( - tensors_batch_d, edge_dir, return_type -): - """ - This helper function creates sampled DGL MFGS for - homogeneous graphs from tensors per hop for a single - batch - Args: - tensors_batch_d (dict): A dictionary of tensors, keyed by hop_id. - edge_dir (str): Direction of edges from samples - metagraph (dgl.metagraph): The metagraph for the sampled graph - return_type (str): The type of graph to return - Returns: - tuple: A tuple of three elements: - - input_nodes: The input nodes for the batch. - - output_nodes: The output nodes for the batch. - - graph_per_hop_ls: A list of MFGS for each hop. - """ - if edge_dir not in ["in", "out"]: - raise ValueError(f"Invalid edge_dir {edge_dir} provided") - if edge_dir == "out": - raise ValueError("Outwards edges not supported yet") - graph_per_hop_ls = [] - seednodes_range = None - for hop_id, tensor_per_hop_d in tensors_batch_d.items(): - if hop_id != "map": - if return_type == "dgl.Block": - mfg = _create_homogeneous_dgl_block_from_tensor_d( - tensor_d=tensor_per_hop_d, - renumber_map=tensors_batch_d["map"], - seednodes_range=seednodes_range, - ) - elif return_type == "cugraph_dgl.nn.SparseGraph": - mfg = _create_homogeneous_cugraph_dgl_nn_sparse_graph( - tensor_d=tensor_per_hop_d, seednodes_range=seednodes_range - ) - else: - raise ValueError(f"Invalid return_type {return_type} provided") - seednodes_range = max( - tensor_per_hop_d["sources_range"], - tensor_per_hop_d["destinations_range"], - ) - graph_per_hop_ls.append(mfg) - - # default DGL behavior - if edge_dir == "in": - graph_per_hop_ls.reverse() - if return_type == "dgl.Block": - input_nodes = graph_per_hop_ls[0].srcdata[dgl.NID] - output_nodes = graph_per_hop_ls[-1].dstdata[dgl.NID] - else: - map = tensors_batch_d["map"] - input_nodes = map[0 : graph_per_hop_ls[0].num_src_nodes()] - output_nodes = map[0 : graph_per_hop_ls[-1].num_dst_nodes()] - return input_nodes, output_nodes, graph_per_hop_ls - - -def _create_homogeneous_dgl_block_from_tensor_d( - tensor_d, - renumber_map, - seednodes_range=None, -): - rs = tensor_d["sources"] - rd = tensor_d["destinations"] - max_src_nodes = tensor_d["sources_range"] - max_dst_nodes = tensor_d["destinations_range"] - if seednodes_range is not None: - # If we have vertices without outgoing edges, then - # sources can be missing from seednodes - # so we add them - # to ensure all the blocks are - # lined up correctly - max_dst_nodes = max(max_dst_nodes, seednodes_range) - - data_dict = {("_N", "_E", "_N"): (rs, rd)} - num_src_nodes = {"_N": max_src_nodes + 1} - num_dst_nodes = {"_N": max_dst_nodes + 1} - - block = dgl.create_block( - data_dict=data_dict, num_src_nodes=num_src_nodes, num_dst_nodes=num_dst_nodes - ) - if "edge_id" in tensor_d: - block.edata[dgl.EID] = tensor_d["edge_id"] - # Below adds run time overhead - block.srcdata[dgl.NID] = renumber_map[0 : max_src_nodes + 1] - block.dstdata[dgl.NID] = renumber_map[0 : max_dst_nodes + 1] - return block - - -def _create_homogeneous_cugraph_dgl_nn_sparse_graph(tensor_d, seednodes_range): - max_src_nodes = tensor_d["sources_range"] - max_dst_nodes = tensor_d["destinations_range"] - if seednodes_range is not None: - max_dst_nodes = max(max_dst_nodes, seednodes_range) - size = (max_src_nodes + 1, max_dst_nodes + 1) - sparse_graph = cugraph_dgl.nn.SparseGraph( - size=size, - src_ids=tensor_d["sources"], - dst_ids=tensor_d["destinations"], - formats=["csc"], - reduce_memory=True, - ) - return sparse_graph - - -def create_heterogeneous_sampled_graphs_from_dataframe( - sampled_df: cudf.DataFrame, - num_nodes_dict: Dict[str, int], - etype_id_dict: Dict[int, Tuple[str, str, str]], - etype_offset_dict: Dict[Tuple[str, str, str], int], - ntype_offset_dict: Dict[str, int], - edge_dir: str = "in", -): - """ - This helper function creates DGL MFGS from cugraph sampled dataframe - """ - sampled_df["batch_id"] = sampled_df["batch_id"] - sampled_df["batch_id"].min() - result_df_ls = sampled_df[ - ["sources", "destinations", "edge_id", "hop_id", "edge_type"] - ].scatter_by_map(sampled_df["batch_id"], keep_index=False) - del sampled_df - - result_df_ls = [ - batch_df[["sources", "destinations", "edge_id", "edge_type"]].scatter_by_map( - batch_df["hop_id"], keep_index=False - ) - for batch_df in result_df_ls - ] - - result_tensor_ls = [ - [ - _get_edges_dict_from_perhop_df( - h_df, etype_id_dict, etype_offset_dict, ntype_offset_dict - ) - for h_df in per_batch_ls - ] - for per_batch_ls in result_df_ls - ] - del result_df_ls - - result_mfgs = [ - _create_heterogenous_sampled_graphs_from_tensors_perhop( - tensors_perhop_ls, num_nodes_dict, edge_dir - ) - for tensors_perhop_ls in result_tensor_ls - ] - return result_mfgs - - -def _get_edges_dict_from_perhop_df( - df, etype_id_dict, etype_offset_dict, ntype_offset_dict -): - # Optimize below function - # based on _get_tensor_ls_from_sampled_df - edges_per_type_ls = df[["sources", "destinations", "edge_id"]].scatter_by_map( - df["edge_type"], map_size=len(etype_id_dict), keep_index=False - ) - del df - per_type_df_d = {etype_id_dict[i]: df for i, df in enumerate(edges_per_type_ls)} - del edges_per_type_ls - # reverse src,dst here - per_type_tensor_d = { - etype: ( - cast_to_tensor(etype_df["sources"]) - ntype_offset_dict[etype[0]], - cast_to_tensor(etype_df["destinations"]) - ntype_offset_dict[etype[2]], - cast_to_tensor(etype_df["edge_id"]) - etype_offset_dict[etype], - ) - for etype, etype_df in per_type_df_d.items() - } - return per_type_tensor_d - - -def _create_heterogenous_sampled_graphs_from_tensors_perhop( - tensors_perhop_ls, num_nodes_dict, edge_dir -): - if edge_dir not in ["in", "out"]: - raise ValueError(f"Invalid edge_dir {edge_dir} provided") - if edge_dir == "out": - raise ValueError("Outwards edges not supported yet") - graph_per_hop_ls = [] - output_nodes = None - - seed_nodes = None - for hop_edges_dict in tensors_perhop_ls: - block = create_heterogenous_dgl_block_from_tensors_dict( - hop_edges_dict, num_nodes_dict, seed_nodes - ) - seed_nodes = block.srcdata[dgl.NID] - if output_nodes is None: - output_nodes = block.dstdata[dgl.NID] - graph_per_hop_ls.append(block) - - # default DGL behavior - if edge_dir == "in": - graph_per_hop_ls.reverse() - return seed_nodes, output_nodes, graph_per_hop_ls - - -def create_heterogenous_dgl_block_from_tensors_dict( - edges_dict: Dict[Tuple(str, str, str), (torch.Tensor, torch.Tensor, torch.Tensor)], - num_nodes_dict: Dict[str, torch.Tensor], - seed_nodes: Optional[Dict[str, torch.Tensor]], -): - data_dict = {k: (s, d) for k, (s, d, _) in edges_dict.items()} - edge_ids_dict = {k: eid for k, (_, _, eid) in edges_dict.items()} - - sampled_graph = dgl.heterograph( - data_dict=data_dict, - num_nodes_dict=num_nodes_dict, - ) - sampled_graph.edata[dgl.EID] = edge_ids_dict - - src_d = defaultdict(list) - dst_d = defaultdict(list) - - for (s, _, d), (src_id, dst_id) in data_dict.items(): - src_d[s].append(src_id) - dst_d[d].append(dst_id) - - src_d = {k: torch.cat(v).unique() for k, v in src_d.items() if len(v) > 0} - if seed_nodes is None: - seed_nodes = {k: torch.cat(v).unique() for k, v in dst_d.items() if len(v) > 0} - - block = dgl.to_block(sampled_graph, dst_nodes=seed_nodes, src_nodes=src_d) - block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] - return block - - -def _process_sampled_tensors_csc( - tensors: Dict["torch.Tensor"], - reverse_hop_id: bool = True, -) -> Tuple[ - Dict[int, Dict[int, Dict[str, "torch.Tensor"]]], - List["torch.Tensor"], - List[List[int, int]], -]: - """ - Convert tensors generated by BulkSampler to a dictionary of tensors, to - facilitate MFG creation. The sampled graphs in the dataframe use CSC-format. - - Parameters - ---------- - tensors: Dict[torch.Tensor] - The output from BulkSampler compressed in CSC format. The dataframe - should be generated with `compression="CSR"` in BulkSampler, - since the sampling routine treats seed nodes as sources. - - reverse_hop_id: bool (default=True) - Reverse hop id. - - Returns - ------- - tensors_dict: dict - A nested dictionary keyed by batch id and hop id. - `tensor_dict[batch_id][hop_id]` holds "minors" and "major_offsets" - values for CSC MFGs. - - renumber_map_list: list - List of renumbering maps for looking up global indices of nodes. One - map for each batch. - - mfg_sizes: list - List of the number of nodes in each message passing layer. For the - k-th hop, mfg_sizes[k] and mfg_sizes[k+1] is the number of sources and - destinations, respectively. - """ - - major_offsets = tensors["major_offsets"] - minors = tensors["minors"] - label_hop_offsets = tensors["label_hop_offsets"] - renumber_map = tensors["map"] - renumber_map_offsets = tensors["renumber_map_offsets"] - - n_batches = len(renumber_map_offsets) - 1 - n_hops = int((len(label_hop_offsets) - 1) / n_batches) - - # make global offsets local - # Have to make a clone as pytorch does not allow - # in-place operations on tensors - major_offsets -= major_offsets[0].clone() - label_hop_offsets -= label_hop_offsets[0].clone() - renumber_map_offsets -= renumber_map_offsets[0].clone() - - # get the sizes of each adjacency matrix (for MFGs) - mfg_sizes = (label_hop_offsets[1:] - label_hop_offsets[:-1]).reshape( - (n_batches, n_hops) - ) - n_nodes = renumber_map_offsets[1:] - renumber_map_offsets[:-1] - mfg_sizes = torch.hstack((mfg_sizes, n_nodes.reshape(n_batches, -1))) - if reverse_hop_id: - mfg_sizes = mfg_sizes.flip(1) - - tensors_dict = {} - renumber_map_list = [] - # Note: minors and major_offsets from BulkSampler are of type int32 - # and int64 respectively. Since pylibcugraphops binding code doesn't - # support distinct node and edge index type, we simply casting both - # to int32 for now. - minors = minors.int() - major_offsets = major_offsets.int() - # Note: We transfer tensors to CPU here to avoid the overhead of - # transferring them in each iteration of the for loop below. - major_offsets_cpu = major_offsets.to("cpu").numpy() - label_hop_offsets_cpu = label_hop_offsets.to("cpu").numpy() - - for batch_id in range(n_batches): - batch_dict = {} - for hop_id in range(n_hops): - hop_dict = {} - idx = batch_id * n_hops + hop_id # idx in label_hop_offsets - major_offsets_start = label_hop_offsets_cpu[idx] - major_offsets_end = label_hop_offsets_cpu[idx + 1] - minors_start = major_offsets_cpu[major_offsets_start] - minors_end = major_offsets_cpu[major_offsets_end] - hop_dict["minors"] = minors[minors_start:minors_end] - hop_dict["major_offsets"] = ( - major_offsets[major_offsets_start : major_offsets_end + 1] - - major_offsets[major_offsets_start] - ) - if reverse_hop_id: - batch_dict[n_hops - 1 - hop_id] = hop_dict - else: - batch_dict[hop_id] = hop_dict - - tensors_dict[batch_id] = batch_dict - - renumber_map_list.append( - renumber_map[ - renumber_map_offsets[batch_id] : renumber_map_offsets[batch_id + 1] - ], - ) - - return tensors_dict, renumber_map_list, mfg_sizes.tolist() - - -def _process_sampled_df_csc( - df: cudf.DataFrame, - reverse_hop_id: bool = True, -): - """ - Convert a dataframe generated by BulkSampler to a dictionary of tensors, to - facilitate MFG creation. The sampled graphs in the dataframe use CSC-format. - - Parameters - ---------- - df: cudf.DataFrame - The output from BulkSampler compressed in CSC format. The dataframe - should be generated with `compression="CSR"` in BulkSampler, - since the sampling routine treats seed nodes as sources. - - reverse_hop_id: bool (default=True) - Reverse hop id. - - Returns - ------- - tensors_dict: dict - A nested dictionary keyed by batch id and hop id. - `tensor_dict[batch_id][hop_id]` holds "minors" and "major_offsets" - values for CSC MFGs. - - renumber_map_list: list - List of renumbering maps for looking up global indices of nodes. One - map for each batch. - - mfg_sizes: list - List of the number of nodes in each message passing layer. For the - k-th hop, mfg_sizes[k] and mfg_sizes[k+1] is the number of sources and - destinations, respectively. - """ - - return _process_sampled_tensors_csc( - { - "major_offsets": cast_to_tensor(df.major_offsets.dropna()), - "label_hop_offsets": cast_to_tensor(df.label_hop_offsets.dropna()), - "renumber_map_offsets": cast_to_tensor(df.renumber_map_offsets.dropna()), - "map": cast_to_tensor(df["map"].dropna()), - "minors": cast_to_tensor(df.minors.dropna()), - }, - reverse_hop_id=reverse_hop_id, - ) - - -def _create_homogeneous_blocks_from_csc( - tensors_dict: Dict[int, Dict[int, Dict[str, torch.Tensor]]], - renumber_map_list: List[torch.Tensor], - mfg_sizes: List[int, int], -): - """Create mini-batches of MFGs in the dgl.Block format. - The input arguments are the outputs of - the function `_process_sampled_df_csc`. - - Returns - ------- - output: list - A list of mini-batches. Each mini-batch is a list that consists of - `input_nodes` tensor, `output_nodes` tensor and a list of MFGs. - """ - n_batches, n_hops = len(mfg_sizes), len(mfg_sizes[0]) - 1 - output = [] - for b_id in range(n_batches): - output_batch = [] - output_batch.append(renumber_map_list[b_id]) - output_batch.append(renumber_map_list[b_id][: mfg_sizes[b_id][-1]]) - - mfgs = [ - SparseGraph( - size=(mfg_sizes[b_id][h_id], mfg_sizes[b_id][h_id + 1]), - src_ids=tensors_dict[b_id][h_id]["minors"], - cdst_ids=tensors_dict[b_id][h_id]["major_offsets"], - formats=["csc", "coo"], - reduce_memory=True, - ) - for h_id in range(n_hops) - ] - - blocks = [] - seednodes_range = None - for mfg in reversed(mfgs): - block_mfg = _create_homogeneous_dgl_block_from_tensor_d( - { - "sources": mfg.src_ids(), - "destinations": mfg.dst_ids(), - "sources_range": mfg._num_src_nodes - 1, - "destinations_range": mfg._num_dst_nodes - 1, - }, - renumber_map=renumber_map_list[b_id], - seednodes_range=seednodes_range, - ) - - seednodes_range = max( - mfg._num_src_nodes - 1, - mfg._num_dst_nodes - 1, - ) - blocks.append(block_mfg) - del mfgs - - blocks.reverse() - - output_batch.append(blocks) - - output.append(output_batch) - return output - - -def _create_homogeneous_sparse_graphs_from_csc( - tensors_dict: Dict[int, Dict[int, Dict[str, torch.Tensor]]], - renumber_map_list: List[torch.Tensor], - mfg_sizes: List[int, int], -) -> List[List[torch.Tensor, torch.Tensor, List[SparseGraph]]]: - """Create mini-batches of MFGs. The input arguments are the outputs of - the function `_process_sampled_df_csc`. - - Returns - ------- - output: list - A list of mini-batches. Each mini-batch is a list that consists of - `input_nodes` tensor, `output_nodes` tensor and a list of MFGs. - """ - n_batches, n_hops = len(mfg_sizes), len(mfg_sizes[0]) - 1 - output = [] - for b_id in range(n_batches): - output_batch = [] - output_batch.append(renumber_map_list[b_id]) - output_batch.append(renumber_map_list[b_id][: mfg_sizes[b_id][-1]]) - mfgs = [ - SparseGraph( - size=(mfg_sizes[b_id][h_id], mfg_sizes[b_id][h_id + 1]), - src_ids=tensors_dict[b_id][h_id]["minors"], - cdst_ids=tensors_dict[b_id][h_id]["major_offsets"], - formats=["csc"], - reduce_memory=True, - ) - for h_id in range(n_hops) - ] - - output_batch.append(mfgs) - - output.append(output_batch) - - return output - - -def create_homogeneous_sampled_graphs_from_dataframe_csc( - sampled_df: cudf.DataFrame, output_format: str = "cugraph_dgl.nn.SparseGraph" -): - """Public API to create mini-batches of MFGs using a dataframe output by - BulkSampler, where the sampled graph is compressed in CSC format.""" - if output_format == "cugraph_dgl.nn.SparseGraph": - return _create_homogeneous_sparse_graphs_from_csc( - *(_process_sampled_df_csc(sampled_df)), - ) - elif output_format == "dgl.Block": - return _create_homogeneous_blocks_from_csc( - *(_process_sampled_df_csc(sampled_df)), - ) - else: - raise ValueError(f"Invalid output format {output_format}") - - -def create_homogeneous_sampled_graphs_from_tensors_csc( - tensors: Dict["torch.Tensor"], output_format: str = "cugraph_dgl.nn.SparseGraph" -): - """Public API to create mini-batches of MFGs using a dataframe output by - BulkSampler, where the sampled graph is compressed in CSC format.""" - if output_format == "cugraph_dgl.nn.SparseGraph": - return _create_homogeneous_sparse_graphs_from_csc( - *(_process_sampled_tensors_csc(tensors)), - ) - elif output_format == "dgl.Block": - return _create_homogeneous_blocks_from_csc( - *(_process_sampled_tensors_csc(tensors)), - ) - else: - raise ValueError(f"Invalid output format {output_format}") diff --git a/python/cugraph-dgl/cugraph_dgl/features.py b/python/cugraph-dgl/cugraph_dgl/features.py deleted file mode 100644 index 9dc009f4127..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/features.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from cugraph.utilities.utils import import_optional, MissingModule - -torch = import_optional("torch") -dgl = import_optional("dgl") -wgth = import_optional("pylibwholegraph.torch") - - -class WholeFeatureStore( - object if isinstance(dgl, MissingModule) else dgl.storages.base.FeatureStorage -): - """ - Interface for feature storage. - """ - - def __init__( - self, - tensor: "torch.Tensor", - memory_type: str = "distributed", - location: str = "cpu", - ): - """ - Constructs a new WholeFeatureStore object that wraps a WholeGraph wholememory - distributed tensor. - - Parameters - ---------- - t: torch.Tensor - The local slice of the tensor being distributed. These should be in order - by rank (i.e. rank 0 contains elements 0-9, rank 1 contains elements 10-19, - rank 3 contains elements 20-29, etc.) The sizes do not need to be equal. - memory_type: str (optional, default='distributed') - The memory type of this store. Options are - 'distributed', 'chunked', and 'continuous'. - For more information consult the WholeGraph - documentation. - location: str(optional, default='cpu') - The location ('cpu' or 'cuda') where data is stored. - """ - self.__wg_comm = wgth.get_global_communicator() - - if len(tensor.shape) > 2: - raise ValueError("Only 1-D or 2-D tensors are supported by WholeGraph.") - - rank = torch.distributed.get_rank() - world_size = torch.distributed.get_world_size() - - ld = torch.tensor(tensor.shape[0], device="cuda", dtype=torch.int64) - sizes = torch.empty((world_size,), device="cuda", dtype=torch.int64) - torch.distributed.all_gather_into_tensor(sizes, ld) - - sizes = sizes.cpu() - ld = sizes.sum() - - self.__td = -1 if len(tensor.shape) == 1 else tensor.shape[1] - global_shape = [ - int(ld), - self.__td if self.__td > 0 else 1, - ] - - if self.__td < 0: - tensor = tensor.reshape((tensor.shape[0], 1)) - - wg_tensor = wgth.create_wholememory_tensor( - self.__wg_comm, - memory_type, - location, - global_shape, - tensor.dtype, - [global_shape[1], 1], - ) - - offset = sizes[:rank].sum() if rank > 0 else 0 - - wg_tensor.scatter( - tensor.clone(memory_format=torch.contiguous_format).cuda(), - torch.arange( - offset, offset + tensor.shape[0], dtype=torch.int64, device="cuda" - ).contiguous(), - ) - - self.__wg_comm.barrier() - - self.__wg_tensor = wg_tensor - - def requires_ddp(self) -> bool: - return True - - def fetch( - self, - indices: torch.Tensor, - device: torch.cuda.Device, - pin_memory=False, - **kwargs, - ): - if pin_memory: - warnings.warn("pin_memory has no effect for WholeFeatureStorage.") - - t = self.__wg_tensor.gather( - indices.cuda(), - force_dtype=self.__wg_tensor.dtype, - ) - - if self.__td < 0: - t = t.reshape((t.shape[0],)) - - return t.to(torch.device(device)) diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py deleted file mode 100644 index 88b93656fa8..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ /dev/null @@ -1,931 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from typing import Union, Optional, Dict, Tuple, List - -from cugraph.utilities.utils import import_optional -from cugraph.gnn import cugraph_comms_get_raft_handle - -import cupy -import pylibcugraph - -from cugraph_dgl.typing import TensorType -from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor -from cugraph_dgl.features import WholeFeatureStore -from cugraph_dgl.view import ( - HeteroNodeView, - HeteroNodeDataView, - HeteroEdgeView, - HeteroEdgeDataView, - EmbeddingView, -) - - -# Have to use import_optional even though these are required -# dependencies in order to build properly. -dgl = import_optional("dgl") -torch = import_optional("torch") -tensordict = import_optional("tensordict") - -HOMOGENEOUS_NODE_TYPE = "n" -HOMOGENEOUS_EDGE_TYPE = (HOMOGENEOUS_NODE_TYPE, "e", HOMOGENEOUS_NODE_TYPE) - - -class Graph: - """ - cuGraph-backed duck-typed version of dgl.DGLGraph that distributes - the graph across workers. This object uses lazy graph creation. - Users can repeatedly call add_edges, and the tensors won't - be converted into a cuGraph graph until one is needed - (i.e. when creating a loader). Supports - single-node/single-GPU, single-node/multi-GPU, and - multi-node/multi-GPU graph storage. - - Each worker should have a slice of the graph locally, and - call put_edge_index with its slice. - """ - - def __init__( - self, - is_multi_gpu: bool = False, - ndata_storage="torch", - edata_storage="torch", - **kwargs, - ): - """ - Parameters - ---------- - is_multi_gpu: bool (optional, default=False) - Specifies whether this graph is distributed across GPUs. - ndata_storage: str (optional, default='torch') - Specifies where node data should be stored - (options are 'torch' and 'wholegraph'). - If using PyTorch tensors for storage ('torch') - then data will be replicated across workers and data - for all nodes should be provided when calling add_nodes. - If using WholeGraph wholememory tensors for storage, - then data will be distributed across workers and only - the local slice of the data should be provided when - calling add_nodes. - edata_storage: str (optional, default='torch') - If using PyTorch tensors for storage ('torch') - then data will be replicated across workers and data - for all nodes should be provided when calling add_edge. - If using WholeGraph wholememory tensors for storage, - then data will be distributed across workers and only - the local slice of the data should be provided when - calling add_edges. - kwargs: - Optional kwargs for WholeGraph feature storage. - """ - - if ndata_storage not in ("torch", "wholegraph"): - raise ValueError( - "Invalid node storage type (valid types are 'torch' and 'wholegraph')" - ) - if edata_storage not in ("torch", "wholegraph"): - raise ValueError( - "Invalid edge storage type (valid types are 'torch' and 'wholegraph')" - ) - - self.__num_nodes_dict = {} - self.__num_edges_dict = {} - self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) - - self.__graph = None - self.__vertex_offsets = None - self.__handle = None - self.__is_multi_gpu = is_multi_gpu - - self.__ndata_storage_type = ( - WholeFeatureStore - if ndata_storage == "wholegraph" - else dgl.storages.pytorch_tensor.PyTorchTensorStorage - ) - self.__edata_storage_type = ( - WholeFeatureStore - if edata_storage == "wholegraph" - else dgl.storages.pytorch_tensor.PyTorchTensorStorage - ) - self.__ndata_storage = {} - self.__edata_storage = {} - self.__wg_kwargs = kwargs - - @property - def is_multi_gpu(self): - return self.__is_multi_gpu - - def to_canonical_etype( - self, etype: Union[str, Tuple[str, str, str]] - ) -> Tuple[str, str, str]: - if etype is None: - if len(self.canonical_etypes) > 1: - raise ValueError("Edge type is required for heterogeneous graphs.") - return HOMOGENEOUS_EDGE_TYPE - - if isinstance(etype, tuple) and len(etype) == 3: - return etype - - for src_type, rel_type, dst_type in self.__edge_indices.keys( - leaves_only=True, include_nested=True - ): - if etype == rel_type: - return (src_type, rel_type, dst_type) - - raise ValueError("Unknown relation type " + etype) - - def add_nodes( - self, - global_num_nodes: int, - data: Optional[Dict[str, TensorType]] = None, - ntype: Optional[str] = None, - ): - """ - Adds the given number of nodes to this graph. Can only be called once - per node type. The number of nodes specified here refers to the total - number of nodes across all workers (the entire graph). If the backing - feature store is distributed (i.e. wholegraph), then only local features - should be passed to the data argument. If the backing feature store is - replicated, then features for all nodes in the graph should be passed to - the data argument, including those for nodes not on the local worker. - - Parameters - ---------- - global_num_nodes: int - The total number of nodes of the given type in this graph. - The same number should be passed to every worker. - data: Dict[str, TensorType] (optional, default=None) - Node feature tensors. - ntype: str (optional, default=None) - The node type being modified. Required for heterogeneous graphs. - """ - if ntype is None: - if len(self.__num_nodes_dict.keys()) > 1: - raise ValueError("Node type is required for heterogeneous graphs.") - ntype = HOMOGENEOUS_NODE_TYPE - - if ntype in self.__num_nodes_dict: - raise ValueError( - "Calling add_nodes multiple types for the same " - "node type is not allowed in cuGraph-DGL" - ) - - if self.is_multi_gpu: - # Ensure all nodes got the same number of nodes passed - world_size = torch.distributed.get_world_size() - local_size = torch.tensor( - [global_num_nodes], device="cuda", dtype=torch.int64 - ) - ns = torch.empty((world_size,), device="cuda", dtype=torch.int64) - torch.distributed.all_gather_into_tensor(ns, local_size) - if not (ns == global_num_nodes).all(): - raise ValueError("The global number of nodes must match on all workers") - - # Ensure the sum of the feature shapes equals the global number of nodes. - if data is not None: - for feature_name, feature_tensor in data.items(): - features_size = torch.tensor( - [int(feature_tensor.shape[0])], device="cuda", dtype=torch.int64 - ) - torch.distributed.all_reduce( - features_size, op=torch.distributed.ReduceOp.SUM - ) - if features_size != global_num_nodes: - raise ValueError( - "The total length of the feature vector across workers must" - " match the global number of nodes but it does not " - f"match for {feature_name}." - ) - - self.__num_nodes_dict[ntype] = global_num_nodes - - if data is not None: - for feature_name, feature_tensor in data.items(): - self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( - _cast_to_torch_tensor(feature_tensor), **self.__wg_kwargs - ) - - self.__graph = None - self.__vertex_offsets = None - - def __check_node_ids(self, ntype: str, ids: TensorType): - """ - Ensures all node ids in the provided id tensor are valid. - Raises a ValueError if any are invalid. - - Parameters - ---------- - ntype: str - The node type being validated against. - ids: - The tensor of ids being validated. - """ - if ntype in self.__num_nodes_dict: - if ids.max() + 1 > self.num_nodes(ntype): - raise ValueError( - f"input tensor contains invalid node ids for type {ntype}" - ) - else: - raise ValueError( - f"add_nodes() must be called for type {ntype} before calling num_edges." - ) - - def add_edges( - self, - u: TensorType, - v: TensorType, - data: Optional[Dict[str, TensorType]] = None, - etype: Optional[Union[str, Tuple[str, str, str]]] = None, - ) -> None: - """ - Adds edges to this graph. Must be called after add_nodes - is called for the src/dst node type. If the backing feature - store is distributed (i.e. wholegraph), then only local - features should be passed to the data argument. If the - backing feature store is replicated, then features for - all edges should be passed to the data argument, - including those for edges not on the local worker. - - Parameters - ---------- - u: TensorType - 1d tensor of source node ids (local slice of the distributed edgelist). - v: TensorType - 1d tensor of destination node ids (local slice of the distributed edgelist). - data: Dict[str, TensorType] (optional, default=None) - Dictionary containing edge features for the new edges. - etype: Union[str, Tuple[str, str, str]] - The edge type of the edges being inserted. Not required - for homogeneous graphs, which have only one edge type. - """ - - # Validate all inputs before proceeding - # The number of nodes for the src/dst type needs to be known and there cannot - # be any edges of this type in the graph. - dgl_can_edge_type = self.to_canonical_etype(etype) - src_type, _, dst_type = dgl_can_edge_type - if dgl_can_edge_type in self.__edge_indices.keys( - leaves_only=True, include_nested=True - ): - raise ValueError( - "This cuGraph-DGL graph already contains edges of type" - f" {dgl_can_edge_type}. Calling add_edges multiple times" - " for the same edge type is not supported." - ) - self.__check_node_ids(src_type, u) - self.__check_node_ids(dst_type, v) - - self.__edge_indices[dgl_can_edge_type] = torch.stack( - [ - _cast_to_torch_tensor(u), - _cast_to_torch_tensor(v), - ] - ).to(self.idtype) - - if data is not None: - for attr_name, attr_tensor in data.items(): - self.__edata_storage[ - dgl_can_edge_type, attr_name - ] = self.__edata_storage_type( - _cast_to_torch_tensor(attr_tensor), **self.__wg_kwargs - ) - - num_edges = self.__edge_indices[dgl_can_edge_type].shape[1] - if self.is_multi_gpu: - num_edges = torch.tensor([num_edges], device="cuda", dtype=torch.int64) - torch.distributed.all_reduce(num_edges, op=torch.distributed.ReduceOp.SUM) - - self.__num_edges_dict[dgl_can_edge_type] = int(num_edges) - - self.__graph = None - self.__vertex_offsets = None - - def num_nodes(self, ntype: Optional[str] = None) -> int: - """ - Returns the number of nodes of ntype, or if ntype is not provided, - the total number of nodes in the graph. - """ - if ntype is None: - return sum(self.__num_nodes_dict.values()) - - return self.__num_nodes_dict[ntype] - - def number_of_nodes(self, ntype: Optional[str] = None) -> int: - """ - Alias for num_nodes. - """ - return self.num_nodes(ntype=ntype) - - def num_edges(self, etype: Union[str, Tuple[str, str, str]] = None) -> int: - """ - Returns the number of edges of etype, or if etype is not provided, - the total number of edges in the graph. - """ - if etype is None: - return sum(self.__num_edges_dict.values()) - - etype = self.to_canonical_etype(etype) - return self.__num_edges_dict[etype] - - def number_of_edges(self, etype: Union[str, Tuple[str, str, str]] = None) -> int: - """ - Alias for num_edges. - """ - return self.num_edges(etype=etype) - - @property - def ntypes(self) -> List[str]: - """ - Returns the node type names in this graph. - """ - return list(self.__num_nodes_dict.keys()) - - @property - def etypes(self) -> List[str]: - """ - Returns the edge type names in this graph - (the second element of the canonical edge - type tuple). - """ - return [et[1] for et in self.__num_edges_dict.keys()] - - @property - def canonical_etypes(self) -> List[str]: - """ - Returns the canonical edge type names in this - graph. - """ - return list(self.__num_edges_dict.keys()) - - @property - def _vertex_offsets(self) -> Dict[str, int]: - if self.__vertex_offsets is None: - ordered_keys = sorted(list(self.ntypes)) - self.__vertex_offsets = {} - offset = 0 - for vtype in ordered_keys: - self.__vertex_offsets[vtype] = offset - offset += self.num_nodes(vtype) - - return dict(self.__vertex_offsets) - - def __get_edgelist(self, prob_attr=None) -> Dict[str, "torch.Tensor"]: - """ - This function always returns src/dst labels with respect - to the out direction. - - Returns - ------- - Dict[str, torch.Tensor] with the following keys: - src: source vertices (int64) - Note that src is the 1st element of the DGL edge index. - dst: destination vertices (int64) - Note that dst is the 2nd element of the DGL edge index. - eid: edge ids for each edge (int64) - Note that these start from 0 for each edge type. - etp: edge types for each edge (int32) - Note that these are in lexicographic order. - """ - sorted_keys = sorted( - list(self.__edge_indices.keys(leaves_only=True, include_nested=True)) - ) - - # note that this still follows the DGL convention of (src, rel, dst) - # i.e. (author, writes, paper): [[0,1,2],[2,0,1]] is referring to a - # cuGraph graph where (paper 2) -> (author 0), (paper 0) -> (author 1), - # and (paper 1) -> (author 0) - edge_index = torch.concat( - [ - torch.stack( - [ - self.__edge_indices[src_type, rel_type, dst_type][0] - + self._vertex_offsets[src_type], - self.__edge_indices[src_type, rel_type, dst_type][1] - + self._vertex_offsets[dst_type], - ] - ) - for (src_type, rel_type, dst_type) in sorted_keys - ], - axis=1, - ).cuda() - - edge_type_array = torch.arange( - len(sorted_keys), dtype=torch.int32, device="cuda" - ).repeat_interleave( - torch.tensor( - [self.__edge_indices[et].shape[1] for et in sorted_keys], - device="cuda", - dtype=torch.int32, - ) - ) - - num_edges_t = torch.tensor( - [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" - ) - - if self.is_multi_gpu: - rank = torch.distributed.get_rank() - world_size = torch.distributed.get_world_size() - - num_edges_all_t = torch.empty( - world_size, num_edges_t.numel(), dtype=torch.int64, device="cuda" - ) - torch.distributed.all_gather_into_tensor(num_edges_all_t, num_edges_t) - - start_offsets = num_edges_all_t[:rank].T.sum(axis=1) - - else: - rank = 0 - start_offsets = torch.zeros( - (len(sorted_keys),), dtype=torch.int64, device="cuda" - ) - num_edges_all_t = num_edges_t.reshape((1, num_edges_t.numel())) - - # Use pinned memory here for fast access to CPU/WG storage - edge_id_array_per_type = [ - torch.arange( - start_offsets[i], - start_offsets[i] + num_edges_all_t[rank][i], - dtype=torch.int64, - device="cpu", - ).pin_memory() - for i in range(len(sorted_keys)) - ] - - # Retrieve the weights from the appropriate feature(s) - # DGL implicitly requires all edge types use the same - # feature name. - if prob_attr is None: - weights = None - else: - if len(sorted_keys) > 1: - weights = torch.concat( - [ - self.edata[prob_attr][sorted_keys[i]][ix] - for i, ix in enumerate(edge_id_array_per_type) - ] - ) - else: - weights = self.edata[prob_attr][edge_id_array_per_type[0]] - - # Safe to move this to cuda because the consumer will always - # move it to cuda if it isn't already there. - edge_id_array = torch.concat(edge_id_array_per_type).cuda() - - edgelist_dict = { - "src": edge_index[0], - "dst": edge_index[1], - "etp": edge_type_array, - "eid": edge_id_array, - } - - if weights is not None: - edgelist_dict["wgt"] = weights - - return edgelist_dict - - @property - def is_homogeneous(self): - return len(self.__num_edges_dict) <= 1 and len(self.__num_nodes_dict) <= 1 - - @property - def idtype(self): - return torch.int64 - - @property - def _resource_handle(self): - if self.__handle is None: - if self.is_multi_gpu: - self.__handle = pylibcugraph.ResourceHandle( - cugraph_comms_get_raft_handle().getHandle() - ) - else: - self.__handle = pylibcugraph.ResourceHandle() - return self.__handle - - def _graph( - self, - direction: str, - prob_attr: Optional[str] = None, - ) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: - """ - Gets the pylibcugraph Graph object with edges pointing in the given direction - (i.e. 'out' is standard, 'in' is reverse). - """ - - if direction not in ["out", "in"]: - raise ValueError(f"Invalid direction {direction} (expected 'in' or 'out').") - - graph_properties = pylibcugraph.GraphProperties( - is_multigraph=True, is_symmetric=False - ) - - if self.__graph is not None: - if ( - self.__graph["direction"] != direction - or self.__graph["prob_attr"] != prob_attr - ): - self.__graph = None - - if self.__graph is None: - src_col, dst_col = ("src", "dst") if direction == "out" else ("dst", "src") - edgelist_dict = self.__get_edgelist(prob_attr=prob_attr) - - if self.is_multi_gpu: - rank = torch.distributed.get_rank() - world_size = torch.distributed.get_world_size() - - vertices_array = cupy.arange(self.num_nodes(), dtype="int64") - vertices_array = cupy.array_split(vertices_array, world_size)[rank] - - graph = pylibcugraph.MGGraph( - self._resource_handle, - graph_properties, - [cupy.asarray(edgelist_dict[src_col]).astype("int64")], - [cupy.asarray(edgelist_dict[dst_col]).astype("int64")], - vertices_array=[vertices_array], - edge_id_array=[cupy.asarray(edgelist_dict["eid"])], - edge_type_array=[cupy.asarray(edgelist_dict["etp"])], - weight_array=[cupy.asarray(edgelist_dict["wgt"])] - if "wgt" in edgelist_dict - else None, - ) - else: - graph = pylibcugraph.SGGraph( - self._resource_handle, - graph_properties, - cupy.asarray(edgelist_dict[src_col]).astype("int64"), - cupy.asarray(edgelist_dict[dst_col]).astype("int64"), - vertices_array=cupy.arange(self.num_nodes(), dtype="int64"), - edge_id_array=cupy.asarray(edgelist_dict["eid"]), - edge_type_array=cupy.asarray(edgelist_dict["etp"]), - weight_array=cupy.asarray(edgelist_dict["wgt"]) - if "wgt" in edgelist_dict - else None, - ) - - self.__graph = {"graph": graph, "direction": direction, "prob_attr": prob_attr} - - return self.__graph["graph"] - - def _has_n_emb(self, ntype: str, emb_name: str) -> bool: - return (ntype, emb_name) in self.__ndata_storage - - def _get_n_emb( - self, ntype: Union[str, None], emb_name: str, u: Union[str, TensorType] - ) -> Union["torch.Tensor", "EmbeddingView"]: - """ - Gets the embedding of a single node type. - Unlike DGL, this function takes the string node - type name instead of an integer id. - - Parameters - ---------- - ntype: str - The node type to get the embedding of. - emb_name: str - The embedding name of the embedding to get. - u: Union[str, TensorType] - Nodes to get the representation of, or ALL - to get the representation of all nodes of - the given type (returns embedding view). - - Returns - ------- - Union[torch.Tensor, cugraph_dgl.view.EmbeddingView] - The embedding of the given edge type with the given embedding name. - """ - - if ntype is None: - if len(self.ntypes) == 1: - ntype = HOMOGENEOUS_NODE_TYPE - else: - raise ValueError("Must provide the node type for a heterogeneous graph") - - if dgl.base.is_all(u): - return EmbeddingView( - self.__ndata_storage[ntype, emb_name], self.num_nodes(ntype) - ) - - try: - return self.__ndata_storage[ntype, emb_name].fetch( - _cast_to_torch_tensor(u), "cuda" - ) - except RuntimeError as ex: - warnings.warn( - "Got error accessing data, trying again with index on device: " - + str(ex) - ) - return self.__ndata_storage[ntype, emb_name].fetch( - _cast_to_torch_tensor(u).cuda(), "cuda" - ) - - def _has_e_emb(self, etype: Tuple[str, str, str], emb_name: str) -> bool: - return (etype, emb_name) in self.__edata_storage - - def _get_e_emb( - self, etype: Tuple[str, str, str], emb_name: str, u: Union[str, TensorType] - ) -> "torch.Tensor": - """ - Gets the embedding of a single edge type. - Unlike DGL, this function takes the canonical edge type - instead of an integer id. - - Parameters - ---------- - etype: str - The edge type to get the embedding of. - emb_name: str - The embedding name of the embedding to get. - u: Union[str, TensorType] - Edges to get the representation of, or ALL to - get the representation of all nodes of the - given type. - - Returns - ------- - torch.Tensor - The embedding of the given edge type with the given embedding name. - """ - - etype = self.to_canonical_etype(etype) - - if dgl.base.is_all(u): - return EmbeddingView( - self.__edata_storage[etype, emb_name], self.num_edges(etype) - ) - - try: - return self.__edata_storage[etype, emb_name].fetch( - _cast_to_torch_tensor(u), "cuda" - ) - except RuntimeError as ex: - warnings.warn( - "Got error accessing data, trying again with index on device: " - + str(ex) - ) - return self.__edata_storage[etype, emb_name].fetch( - _cast_to_torch_tensor(u).cuda(), "cuda" - ) - - def _set_n_emb( - self, ntype: str, u: Union[str, TensorType], kv: Dict[str, TensorType] - ) -> None: - """ - Stores or updates the embedding(s) of a single node type. - Unlike DGL, this function takes the string node type name - instead of an integer id. - - The semantics of this function match those of add_nodes - with respect to whether or not the backing feature store - is distributed. - - Parameters - ---------- - ntype: str - The node type to store an embedding of. - u: Union[str, TensorType] - The indices to update, if updating the embedding. - Currently, updating a slice of an embedding is - unsupported, so this should be ALL. - kv: Dict[str, TensorType] - A mapping of embedding names to embedding tensors. - """ - - if not dgl.base.is_all(u): - raise NotImplementedError( - "Updating a slice of an embedding is " - "currently unimplemented in cuGraph-DGL." - ) - - for k, v in kv: - self.__ndata_storage[ntype, k] = self.__ndata_storage_type( - v, - **self.__wg_kwargs, - ) - - def _set_e_emb( - self, etype: str, u: Union[str, TensorType], kv: Dict[str, TensorType] - ) -> None: - """ - Stores or updates the embedding(s) of a single edge type. - Unlike DGL, this function takes the canonical edge type name - instead of an integer id. - - The semantics of this function match those of add_edges - with respect to whether or not the backing feature store - is distributed. - - Parameters - ---------- - etype: str - The edge type to store an embedding of. - u: Union[str, TensorType] - The indices to update, if updating the embedding. - Currently, updating a slice of an embedding is - unsupported, so this should be ALL. - kv: Dict[str, TensorType] - A mapping of embedding names to embedding tensors. - """ - - if not dgl.base.is_all(u): - raise NotImplementedError( - "Updating a slice of an embedding is " - "currently unimplemented in cuGraph-DGL." - ) - - for k, v in kv: - self.__edata_storage[etype, k] = self.__edata_storage_type( - v, - **self.__wg_kwargs, - ) - - def _pop_n_emb(self, ntype: str, key: str) -> "torch.Tensor": - """ - Removes and returns the embedding of the given node - type with the given name. - - Parameters - ---------- - ntype:str - The node type. - key:str - The embedding name. - - Returns - ------- - The removed embedding. - """ - return self.__ndata_storage[ntype, key].pop(key) - - def _pop_e_emb(self, etype: str, key: str) -> "torch.Tensor": - """ - Removes and returns the embedding of the given edge - type with the given name. - - Parameters - ---------- - etype:str - The node type. - key:str - The embedding name. - - Returns - ------- - torch.Tensor - The removed embedding. - """ - return self.__edata_storage[etype, key].pop(key) - - def _get_n_emb_keys(self, ntype: str) -> List[str]: - """ - Gets a list of the embedding names for a given node - type. - - Parameters - ---------- - ntype: str - The node type to get embedding names for. - - Returns - ------- - List[str] - The list of embedding names for the given node type. - """ - return [k for (t, k) in self.__ndata_storage if ntype == t] - - def _get_e_emb_keys(self, etype: str) -> List[str]: - """ - Gets a list of the embedding names for a given edge - type. - - Parameters - ---------- - etype: str - The edge type to get embedding names for. - - Returns - ------- - List[str] - The list of embedding names for the given edge type. - """ - return [k for (t, k) in self.__edata_storage if etype == t] - - def all_edges( - self, - form="uv", - order="eid", - etype: Union[str, Tuple[str, str, str]] = None, - device: Union[str, int, "torch.device"] = "cpu", - ): - """ - Returns all edges with the specified edge type. - cuGraph-DGL currently only supports 'eid' format and - 'eid' order. - - Parameters - ---------- - form: str (optional, default='uv') - The format to return ('uv', 'eid', 'all'). - - order: str (optional, default='eid') - The order to return edges in ('eid', 'srcdst') - cuGraph-DGL currently only supports 'eid'. - etype: Union[str, Tuple[str, str, str]] (optional, default=None) - The edge type to get. Not required if this is - a homogeneous graph. Can be the relation type if the - relation type is unique, or the canonical edge type. - device: Union[str, int, torch.device] (optional, default='cpu') - The device where returned edges should be stored - ('cpu', 'cuda', or device id). - """ - - if order != "eid": - raise NotImplementedError("cugraph-DGL only supports eid order.") - - if etype is None and len(self.canonical_etypes) > 1: - raise ValueError("Edge type is required for heterogeneous graphs.") - - etype = self.to_canonical_etype(etype) - - if form == "eid": - return torch.arange( - 0, - self.__num_edges_dict[etype], - dtype=self.idtype, - device=device, - ) - else: - if self.is_multi_gpu: - # This can't be done because it requires collective communication. - raise ValueError( - "Calling all_edges in a distributed graph with" - " form 'uv' or 'all' is unsupported." - ) - - else: - eix = self.__edge_indices[etype].to(device) - if form == "uv": - return eix[0], eix[1] - elif form == "all": - return ( - eix[0], - eix[1], - torch.arange( - self.__num_edges_dict[etype], - dtype=self.idtype, - device=device, - ), - ) - else: - raise ValueError(f"Invalid form {form}") - - @property - def ndata(self) -> HeteroNodeDataView: - """ - Returns a view of the node data in this graph which can be used to - access or modify node features. - """ - - if len(self.ntypes) == 1: - ntype = self.ntypes[0] - return HeteroNodeDataView(self, ntype, dgl.base.ALL) - - return HeteroNodeDataView(self, self.ntypes, dgl.base.ALL) - - @property - def edata(self) -> HeteroEdgeDataView: - """ - Returns a view of the edge data in this graph which can be used to - access or modify edge features. - """ - if len(self.canonical_etypes) == 1: - return HeteroEdgeDataView(self, None, dgl.base.ALL) - - return HeteroEdgeDataView(self, self.canonical_etypes, dgl.base.ALL) - - @property - def nodes(self) -> HeteroNodeView: - """ - Returns a view of the nodes in this graph. - """ - return HeteroNodeView(self) - - @property - def edges(self) -> HeteroEdgeView: - """ - Returns a view of the edges in this graph. - """ - return HeteroEdgeView(self) diff --git a/python/cugraph-dgl/cugraph_dgl/nn/__init__.py b/python/cugraph-dgl/cugraph_dgl/nn/__init__.py deleted file mode 100644 index 9a4a087baf4..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from .conv import * # noqa diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/__init__.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/__init__.py deleted file mode 100644 index 3e7f2f076f0..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .base import SparseGraph -from .gatconv import GATConv -from .gatv2conv import GATv2Conv -from .relgraphconv import RelGraphConv -from .sageconv import SAGEConv -from .transformerconv import TransformerConv - -__all__ = [ - "SparseGraph", - "GATConv", - "GATv2Conv", - "RelGraphConv", - "SAGEConv", - "TransformerConv", -] diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py deleted file mode 100644 index fcd5a26aee6..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py +++ /dev/null @@ -1,376 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional - -import cugraph_dgl - -torch = import_optional("torch") -ops_torch = import_optional("pylibcugraphops.pytorch") -dgl = import_optional("dgl") - - -def compress_ids(ids: torch.Tensor, size: int) -> torch.Tensor: - return torch._convert_indices_from_coo_to_csr( - ids, size, out_int32=ids.dtype == torch.int32 - ) - - -def decompress_ids(c_ids: torch.Tensor) -> torch.Tensor: - ids = torch.arange(c_ids.numel() - 1, dtype=c_ids.dtype, device=c_ids.device) - return ids.repeat_interleave(c_ids[1:] - c_ids[:-1]) - - -class SparseGraph(object): - r"""A class to create and store different sparse formats needed by - cugraph-ops. It always creates a CSC representation and can provide COO- or - CSR-format if needed. - - Parameters - ---------- - size: tuple of int - Size of the adjacency matrix: (num_src_nodes, num_dst_nodes). - - src_ids: torch.Tensor - Source indices of the edges. - - dst_ids: torch.Tensor, optional - Destination indices of the edges. - - csrc_ids: torch.Tensor, optional - Compressed source indices. It is a monotonically increasing array of - size (num_src_nodes + 1,). For the k-th source node, its neighborhood - consists of the destinations between `dst_indices[csrc_indices[k]]` and - `dst_indices[csrc_indices[k+1]]`. - - cdst_ids: torch.Tensor, optional - Compressed destination indices. It is a monotonically increasing array of - size (num_dst_nodes + 1,). For the k-th destination node, its neighborhood - consists of the sources between `src_indices[cdst_indices[k]]` and - `src_indices[cdst_indices[k+1]]`. - - values: torch.Tensor, optional - Values on the edges. - - is_sorted: bool - Whether the COO inputs (src_ids, dst_ids, values) have been sorted by - `dst_ids` in an ascending order. CSC layout creation is much faster - when sorted. - - formats: str or tuple of str, optional - The desired sparse formats to create for the graph. The formats tuple - must include "csc". Default: "csc". - - reduce_memory: bool, optional - When set, the tensors are not required by the desired formats will be - set to `None`. Default: True. - - Notes - ----- - For MFGs (sampled graphs), the node ids must have been renumbered. - """ - - supported_formats = { - "coo": ("_src_ids", "_dst_ids"), - "csc": ("_cdst_ids", "_src_ids"), - "csr": ("_csrc_ids", "_dst_ids", "_perm_csc2csr"), - } - - all_tensors = set( - [ - "_src_ids", - "_dst_ids", - "_csrc_ids", - "_cdst_ids", - "_perm_coo2csc", - "_perm_csc2csr", - ] - ) - - def __init__( - self, - size: Tuple[int, int], - src_ids: torch.Tensor, - dst_ids: Optional[torch.Tensor] = None, - csrc_ids: Optional[torch.Tensor] = None, - cdst_ids: Optional[torch.Tensor] = None, - values: Optional[torch.Tensor] = None, - is_sorted: bool = False, - formats: Union[str, Tuple[str]] = "csc", - reduce_memory: bool = True, - ): - self._num_src_nodes, self._num_dst_nodes = size - self._is_sorted = is_sorted - - if dst_ids is None and cdst_ids is None: - raise ValueError( - "One of 'dst_ids' and 'cdst_ids' must be given " - "to create a SparseGraph." - ) - - if src_ids is not None: - src_ids = src_ids.contiguous() - - if dst_ids is not None: - dst_ids = dst_ids.contiguous() - - if csrc_ids is not None: - if csrc_ids.numel() != self._num_src_nodes + 1: - raise RuntimeError( - f"Size mismatch for 'csrc_ids': expected ({size[0] + 1},), " - f"but got {tuple(csrc_ids.size())}" - ) - csrc_ids = csrc_ids.contiguous() - - if cdst_ids is not None: - if cdst_ids.numel() != self._num_dst_nodes + 1: - raise RuntimeError( - f"Size mismatch for 'cdst_ids': expected ({size[1] + 1},), " - f"but got {tuple(cdst_ids.size())}" - ) - cdst_ids = cdst_ids.contiguous() - - if values is not None: - values = values.contiguous() - - self._src_ids = src_ids - self._dst_ids = dst_ids - self._csrc_ids = csrc_ids - self._cdst_ids = cdst_ids - self._values = values - self._perm_coo2csc = None - self._perm_csc2csr = None - - if isinstance(formats, str): - formats = (formats,) - self._formats = formats - - if "csc" not in formats: - raise ValueError( - f"{self.__class__.__name__}.formats must contain " - f"'csc', but got {formats}." - ) - - # always create csc first - if self._cdst_ids is None: - if not self._is_sorted: - self._dst_ids, self._perm_coo2csc = torch.sort(self._dst_ids) - self._src_ids = self._src_ids[self._perm_coo2csc] - if self._values is not None: - self._values = self._values[self._perm_coo2csc] - self._cdst_ids = compress_ids(self._dst_ids, self._num_dst_nodes) - - for format_ in formats: - assert format_ in SparseGraph.supported_formats - self.__getattribute__(f"{format_}")() - - self._reduce_memory = reduce_memory - if reduce_memory: - self.reduce_memory() - - def reduce_memory(self): - """Remove the tensors that are not necessary to create the desired sparse - formats to reduce memory footprint.""" - if self._formats is None: - return - - tensors_needed = [] - for f in self._formats: - tensors_needed += SparseGraph.supported_formats[f] - for t in SparseGraph.all_tensors.difference(set(tensors_needed)): - self.__dict__[t] = None - - def src_ids(self) -> torch.Tensor: - return self._src_ids - - def cdst_ids(self) -> torch.Tensor: - return self._cdst_ids - - def dst_ids(self) -> torch.Tensor: - if self._dst_ids is None: - self._dst_ids = decompress_ids(self._cdst_ids) - return self._dst_ids - - def csrc_ids(self) -> torch.Tensor: - if self._csrc_ids is None: - src_ids, self._perm_csc2csr = torch.sort(self._src_ids) - self._csrc_ids = compress_ids(src_ids, self._num_src_nodes) - return self._csrc_ids - - def num_src_nodes(self): - return self._num_src_nodes - - def num_dst_nodes(self): - return self._num_dst_nodes - - def values(self): - return self._values - - def formats(self): - return self._formats - - def coo(self) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: - if "coo" not in self.formats(): - raise RuntimeError( - "The SparseGraph did not create a COO layout. " - "Set 'formats' list to include 'coo' when creating the graph." - ) - return self.src_ids(), self.dst_ids(), self._values - - def csc(self) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: - if "csc" not in self.formats(): - raise RuntimeError( - "The SparseGraph did not create a CSC layout. " - "Set 'formats' list to include 'csc' when creating the graph." - ) - return self.cdst_ids(), self.src_ids(), self._values - - def csr(self) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: - if "csr" not in self.formats(): - raise RuntimeError( - "The SparseGraph did not create a CSR layout. " - "Set 'formats' list to include 'csr' when creating the graph." - ) - csrc_ids = self.csrc_ids() - dst_ids = self.dst_ids()[self._perm_csc2csr] - value = self._values - if value is not None: - value = value[self._perm_csc2csr] - return csrc_ids, dst_ids, value - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}(num_src_nodes={self._num_src_nodes}, " - f"num_dst_nodes={self._num_dst_nodes}, " - f"num_edges={self._src_ids.size(0)}, formats={self._formats})" - ) - - def to(self, device: Union[torch.device, str, int]) -> "cugraph_dgl.nn.SparseGraph": - sg = SparseGraph( - src_ids=None if self._src_ids is None else self._src_ids.to(device), - dst_ids=None if self._dst_ids is None else self._dst_ids.to(device), - csrc_ids=None if self._csrc_ids is None else self._csrc_ids.to(device), - cdst_ids=None if self._cdst_ids is None else self._cdst_ids.to(device), - values=None if self._values is None else self._values.to(device), - is_sorted=self._is_sorted, - formats=self._formats, - reduce_memory=self._reduce_memory, - ) - - sg._perm_coo2csc = ( - None if self._perm_coo2csc is None else self._perm_coo2csc.to(device) - ) - sg._perm_csc2csr = ( - None if self._perm_csc2csr is None else self._perm_csc2csr.to(device) - ) - - return sg - - -class BaseConv(torch.nn.Module): - r"""An abstract base class for cugraph-ops nn module.""" - - def __init__(self): - super().__init__() - - def reset_parameters(self): - r"""Resets all learnable parameters of the module.""" - raise NotImplementedError - - def forward(self, *args): - r"""Runs the forward pass of the module.""" - raise NotImplementedError - - def get_cugraph_ops_CSC( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - is_bipartite: bool = False, - max_in_degree: Optional[int] = None, - ) -> ops_torch.CSC: - """Create CSC structure needed by cugraph-ops.""" - - if not isinstance(g, (SparseGraph, dgl.DGLHeteroGraph)): - raise TypeError( - f"The graph has to be either a 'cugraph_dgl.nn.SparseGraph' or " - f"'dgl.DGLHeteroGraph', but got '{type(g)}'." - ) - - # TODO: max_in_degree should default to None in pylibcugraphops - if max_in_degree is None: - max_in_degree = -1 - - if isinstance(g, SparseGraph): - offsets, indices, _ = g.csc() - else: - offsets, indices, _ = g.adj_tensors("csc") - - graph = ops_torch.CSC( - offsets=offsets, - indices=indices, - num_src_nodes=g.num_src_nodes(), - dst_max_in_degree=max_in_degree, - is_bipartite=is_bipartite, - ) - - return graph - - def get_cugraph_ops_HeteroCSC( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - num_edge_types: int, - etypes: Optional[torch.Tensor] = None, - is_bipartite: bool = False, - max_in_degree: Optional[int] = None, - ) -> ops_torch.HeteroCSC: - """Create HeteroCSC structure needed by cugraph-ops.""" - - if not isinstance(g, (SparseGraph, dgl.DGLHeteroGraph)): - raise TypeError( - f"The graph has to be either a 'cugraph_dgl.nn.SparseGraph' or " - f"'dgl.DGLHeteroGraph', but got '{type(g)}'." - ) - - # TODO: max_in_degree should default to None in pylibcugraphops - if max_in_degree is None: - max_in_degree = -1 - - if isinstance(g, SparseGraph): - offsets, indices, etypes = g.csc() - if etypes is None: - raise ValueError( - "SparseGraph must have 'values' to create HeteroCSC. " - "Pass in edge types as 'values' when creating the SparseGraph." - ) - etypes = etypes.int() - else: - if etypes is None: - raise ValueError( - "'etypes' is required when creating HeteroCSC " - "from dgl.DGLHeteroGraph." - ) - offsets, indices, perm = g.adj_tensors("csc") - etypes = etypes[perm].int() - - graph = ops_torch.HeteroCSC( - offsets=offsets, - indices=indices, - edge_types=etypes, - num_src_nodes=g.num_src_nodes(), - num_edge_types=num_edge_types, - dst_max_in_degree=max_in_degree, - is_bipartite=is_bipartite, - ) - - return graph diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py deleted file mode 100644 index e8813271fd8..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py +++ /dev/null @@ -1,314 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class GATConv(BaseConv): - r"""Graph attention layer from `Graph Attention Network - `__, with the sparse aggregation - accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int or (int, int) - Input feature size. A pair denotes feature sizes of source and - destination nodes. - out_feats : int - Output feature size. - num_heads : int - Number of heads in multi-head attention. - feat_drop : float, optional - Dropout rate on feature. Defaults: ``0``. - concat : bool, optional - If False, the multi-head attentions are averaged instead of concatenated. - Default: ``True``. - edge_feats : int, optional - Edge feature size. Default: ``None``. - negative_slope : float, optional - LeakyReLU angle of negative slope. Defaults: ``0.2``. - residual : bool, optional - If True, use residual connection. Defaults: ``False``. - allow_zero_in_degree : bool, optional - If there are 0-in-degree nodes in the graph, output for those nodes will - be invalid since no message will be passed to those nodes. This is - harmful for some applications causing silent performance regression. - This module will raise a DGLError if it detects 0-in-degree nodes in - input graph. By setting ``True``, it will suppress the check and let the - users handle it by themselves. Defaults: ``False``. - bias : bool, optional - If True, learns a bias term. Defaults: ``True``. - - Examples - -------- - >>> import dgl - >>> import torch - >>> from cugraph_dgl.nn import GATConv - ... - >>> device = 'cuda' - >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) - >>> g = dgl.add_self_loop(g) - >>> feat = torch.ones(6, 10).to(device) - >>> conv = GATConv(10, 2, num_heads=3).to(device) - >>> res = conv(g, feat) - >>> res - tensor([[[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]], - [[ 0.2340, 1.9226], - [ 1.6477, -1.9986], - [ 1.1138, -1.9302]]], device='cuda:0', grad_fn=) - """ - - def __init__( - self, - in_feats: Union[int, tuple[int, int]], - out_feats: int, - num_heads: int, - feat_drop: float = 0.0, - concat: bool = True, - edge_feats: Optional[int] = None, - negative_slope: float = 0.2, - residual: bool = False, - allow_zero_in_degree: bool = False, - bias: bool = True, - ): - super().__init__() - - if isinstance(in_feats, int): - self.in_feats_src = self.in_feats_dst = in_feats - else: - self.in_feats_src, self.in_feats_dst = in_feats - self.in_feats = in_feats - self.out_feats = out_feats - self.num_heads = num_heads - self.feat_drop = nn.Dropout(feat_drop) - self.concat = concat - self.edge_feats = edge_feats - self.negative_slope = negative_slope - self.residual = residual - self.allow_zero_in_degree = allow_zero_in_degree - - if isinstance(in_feats, int): - self.lin = nn.Linear(in_feats, num_heads * out_feats, bias=False) - else: - self.lin_src = nn.Linear( - self.in_feats_src, num_heads * out_feats, bias=False - ) - self.lin_dst = nn.Linear( - self.in_feats_dst, num_heads * out_feats, bias=False - ) - - if edge_feats is not None: - self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) - self.attn_weights = nn.Parameter(torch.empty(3 * num_heads * out_feats)) - else: - self.register_parameter("lin_edge", None) - self.attn_weights = nn.Parameter(torch.empty(2 * num_heads * out_feats)) - - out_dim = num_heads * out_feats if concat else out_feats - if residual: - if self.in_feats_dst != out_dim: - self.lin_res = nn.Linear(self.in_feats_dst, out_dim, bias=bias) - else: - self.lin_res = nn.Identity() - else: - self.register_buffer("lin_res", None) - - if bias and not isinstance(self.lin_res, nn.Linear): - if concat: - self.bias = nn.Parameter(torch.empty(num_heads, out_feats)) - else: - self.bias = nn.Parameter(torch.empty(out_feats)) - else: - self.register_buffer("bias", None) - - self.reset_parameters() - - def set_allow_zero_in_degree(self, set_value): - r"""Set allow_zero_in_degree flag.""" - self.allow_zero_in_degree = set_value - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - gain = nn.init.calculate_gain("relu") - if hasattr(self, "lin"): - nn.init.xavier_normal_(self.lin.weight, gain=gain) - else: - nn.init.xavier_normal_(self.lin_src.weight, gain=gain) - nn.init.xavier_normal_(self.lin_dst.weight, gain=gain) - - nn.init.xavier_normal_( - self.attn_weights.view(-1, self.num_heads, self.out_feats), gain=gain - ) - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - if self.lin_res is not None: - self.lin_res.reset_parameters() - - if self.bias is not None: - nn.init.zeros_(self.bias) - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - efeat: Optional[torch.Tensor] = None, - max_in_degree: Optional[int] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - high_precision_dgrad: bool = False, - high_precision_wgrad: bool = False, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - graph : DGLGraph or SparseGraph - The graph. - nfeat : torch.Tensor or (torch.Tensor, torch.Tensor) - Node features. If given as a tuple, the two elements correspond to - the source and destination node features, respectively, in a - bipartite graph. - efeat: torch.Tensor, optional - Optional edge features. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - high_precision_dgrad: bool, default=False - Optional flag indicating whether gradients for inputs in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - high_precision_wgrad: bool, default=False - Optional flag indicating whether gradients for weights in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - - Returns - ------- - torch.Tensor - The output feature of shape :math:`(N, H, D_{out})` where - :math:`H` is the number of heads, and :math:`D_{out}` is size of - output feature. - """ - if isinstance(g, dgl.DGLHeteroGraph): - if not self.allow_zero_in_degree: - if (g.in_degrees() == 0).any(): - raise dgl.base.DGLError( - "There are 0-in-degree nodes in the graph, " - "output for those nodes will be invalid. " - "This is harmful for some applications, " - "causing silent performance regression. " - "Adding self-loop on the input graph by " - "calling `g = dgl.add_self_loop(g)` will resolve " - "the issue. Setting ``allow_zero_in_degree`` " - "to be `True` when constructing this module will " - "suppress the check and let the code run." - ) - - bipartite = isinstance(nfeat, (list, tuple)) - - _graph = self.get_cugraph_ops_CSC( - g, is_bipartite=bipartite, max_in_degree=max_in_degree - ) - if deterministic_dgrad: - _graph.add_reverse_graph() - - if bipartite: - nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1])) - nfeat_dst_orig = nfeat[1] - else: - nfeat = self.feat_drop(nfeat) - nfeat_dst_orig = nfeat[: g.num_dst_nodes()] - - if efeat is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_feats must be set to " - f"accept edge features." - ) - efeat = self.lin_edge(efeat) - - if bipartite: - if not hasattr(self, "lin_src"): - nfeat_src = self.lin(nfeat[0]) - nfeat_dst = self.lin(nfeat[1]) - else: - nfeat_src = self.lin_src(nfeat[0]) - nfeat_dst = self.lin_dst(nfeat[1]) - else: - if not hasattr(self, "lin"): - raise RuntimeError( - f"{self.__class__.__name__}.in_feats is expected to be an " - f"integer when the graph is not bipartite, " - f"but got {self.in_feats}." - ) - nfeat = self.lin(nfeat) - - out = ops_torch.operators.mha_gat_n2n( - (nfeat_src, nfeat_dst) if bipartite else nfeat, - self.attn_weights, - _graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=efeat, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - high_precision_dgrad=high_precision_dgrad, - high_precision_wgrad=high_precision_wgrad, - )[: g.num_dst_nodes()] - - if self.concat: - out = out.view(-1, self.num_heads, self.out_feats) - - if self.residual: - res = self.lin_res(nfeat_dst_orig).view(-1, self.num_heads, self.out_feats) - if not self.concat: - res = res.mean(dim=1) - out = out + res - - if self.bias is not None: - out = out + self.bias - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py deleted file mode 100644 index 4f47005f8ee..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class GATv2Conv(BaseConv): - r"""GATv2 from `How Attentive are Graph Attention Networks? - `__, with the sparse aggregation - accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int or (int, int) - Input feature size. A pair denotes feature sizes of source and - destination nodes. - out_feats : int - Output feature size. - num_heads : int - Number of heads in Multi-Head Attention. - feat_drop : float, optional - Dropout rate on feature. Defaults: ``0``. - concat : bool, optional - If False, the multi-head attentions are averaged instead of concatenated. - Default: ``True``. - edge_feats : int, optional - Edge feature size. Default: ``None``. - negative_slope : float, optional - LeakyReLU angle of negative slope. Defaults: ``0.2``. - residual : bool, optional - If True, use residual connection. Defaults: ``False``. - allow_zero_in_degree : bool, optional - If there are 0-in-degree nodes in the graph, output for those nodes will - be invalid since no message will be passed to those nodes. This is - harmful for some applications causing silent performance regression. - This module will raise a DGLError if it detects 0-in-degree nodes in - input graph. By setting ``True``, it will suppress the check and let the - users handle it by themselves. Defaults: ``False``. - bias : bool, optional - If True, learns a bias term. Defaults: ``True``. - share_weights : bool, optional - If ``True``, the same matrix will be applied to the source and the - destination node features. Defaults: ``False``. - """ - - def __init__( - self, - in_feats: Union[int, tuple[int, int]], - out_feats: int, - num_heads: int, - feat_drop: float = 0.0, - concat: bool = True, - edge_feats: Optional[int] = None, - negative_slope: float = 0.2, - residual: bool = False, - allow_zero_in_degree: bool = False, - bias: bool = True, - share_weights: bool = False, - ): - super().__init__() - - if isinstance(in_feats, int): - self.in_feats_src = self.in_feats_dst = in_feats - else: - self.in_feats_src, self.in_feats_dst = in_feats - self.in_feats = in_feats - self.out_feats = out_feats - self.num_heads = num_heads - self.feat_drop = nn.Dropout(feat_drop) - self.concat = concat - self.edge_feats = edge_feats - self.negative_slope = negative_slope - self.residual = residual - self.allow_zero_in_degree = allow_zero_in_degree - self.share_weights = share_weights - self.bias = bias - - self.lin_src = nn.Linear(self.in_feats_src, num_heads * out_feats, bias=bias) - if share_weights: - if self.in_feats_src != self.in_feats_dst: - raise ValueError( - f"Input feature size of source and destination " - f"nodes must be identical when share_weights is enabled, " - f"but got {self.in_feats_src} and {self.in_feats_dst}." - ) - self.lin_dst = self.lin_src - else: - self.lin_dst = nn.Linear( - self.in_feats_dst, num_heads * out_feats, bias=bias - ) - - self.attn_weights = nn.Parameter(torch.empty(num_heads * out_feats)) - - if edge_feats is not None: - self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) - else: - self.register_parameter("lin_edge", None) - - out_dim = num_heads * out_feats if concat else out_feats - if residual: - if self.in_feats_dst != out_dim: - self.lin_res = nn.Linear(self.in_feats_dst, out_dim, bias=bias) - else: - self.lin_res = nn.Identity() - else: - self.register_buffer("lin_res", None) - - self.reset_parameters() - - def set_allow_zero_in_degree(self, set_value): - r"""Set allow_zero_in_degree flag.""" - self.allow_zero_in_degree = set_value - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - gain = nn.init.calculate_gain("relu") - nn.init.xavier_normal_(self.lin_src.weight, gain=gain) - nn.init.xavier_normal_(self.lin_dst.weight, gain=gain) - - nn.init.xavier_normal_( - self.attn_weights.view(-1, self.num_heads, self.out_feats), gain=gain - ) - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - if self.lin_res is not None: - self.lin_res.reset_parameters() - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - efeat: Optional[torch.Tensor] = None, - max_in_degree: Optional[int] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - graph : DGLGraph or SparseGraph - The graph. - nfeat : torch.Tensor - Input features of shape :math:`(N, D_{in})`. - efeat: torch.Tensor, optional - Optional edge features. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - - Returns - ------- - torch.Tensor - The output feature of shape :math:`(N, H, D_{out})` where - :math:`H` is the number of heads, and :math:`D_{out}` is size of - output feature. - """ - - if isinstance(g, dgl.DGLHeteroGraph): - if not self.allow_zero_in_degree: - if (g.in_degrees() == 0).any(): - raise dgl.base.DGLError( - "There are 0-in-degree nodes in the graph, " - "output for those nodes will be invalid. " - "This is harmful for some applications, " - "causing silent performance regression. " - "Adding self-loop on the input graph by " - "calling `g = dgl.add_self_loop(g)` will resolve " - "the issue. Setting ``allow_zero_in_degree`` " - "to be `True` when constructing this module will " - "suppress the check and let the code run." - ) - - nfeat_bipartite = isinstance(nfeat, (list, tuple)) - graph_bipartite = nfeat_bipartite or self.share_weights is False - - _graph = self.get_cugraph_ops_CSC( - g, is_bipartite=graph_bipartite, max_in_degree=max_in_degree - ) - if deterministic_dgrad: - _graph.add_reverse_graph() - - if nfeat_bipartite: - nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1])) - nfeat_dst_orig = nfeat[1] - else: - nfeat = self.feat_drop(nfeat) - nfeat_dst_orig = nfeat[: g.num_dst_nodes()] - - if efeat is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_feats must be set to " - f"accept edge features." - ) - efeat = self.lin_edge(efeat) - - if nfeat_bipartite: - nfeat = (self.lin_src(nfeat[0]), self.lin_dst(nfeat[1])) - elif graph_bipartite: - nfeat = (self.lin_src(nfeat), self.lin_dst(nfeat[: g.num_dst_nodes()])) - else: - nfeat = self.lin_src(nfeat) - - out = ops_torch.operators.mha_gat_v2_n2n( - nfeat, - self.attn_weights, - _graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=efeat, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - )[: g.num_dst_nodes()] - - if self.concat: - out = out.view(-1, self.num_heads, self.out_feats) - - if self.residual: - res = self.lin_res(nfeat_dst_orig).view(-1, self.num_heads, self.out_feats) - if not self.concat: - res = res.mean(dim=1) - out = out + res - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py deleted file mode 100644 index 5c4b5dea441..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class RelGraphConv(BaseConv): - r"""An accelerated relational graph convolution layer from `Modeling - Relational Data with Graph Convolutional Networks - `__, with the sparse aggregation - accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int - Input feature size. - out_feats : int - Output feature size. - num_rels : int - Number of relations. - regularizer : str, optional - Which weight regularizer to use ("basis" or ``None``): - - "basis" is for basis-decomposition. - - ``None`` applies no regularization. - Default: ``None``. - num_bases : int, optional - Number of bases. It comes into effect when a regularizer is applied. - Default: ``None``. - bias : bool, optional - True if bias is added. Default: ``True``. - self_loop : bool, optional - True to include self loop message. Default: ``True``. - dropout : float, optional - Dropout rate. Default: ``0.0``. - apply_norm : bool, optional - True to normalize aggregation output by the in-degree of the destination - node per edge type, i.e. :math:`|\mathcal{N}^r_i|`. Default: ``True``. - - Examples - -------- - >>> import dgl - >>> import torch - >>> from cugraph_dgl.nn import RelGraphConv - ... - >>> device = 'cuda' - >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) - >>> feat = torch.ones(6, 10).to(device) - >>> conv = RelGraphConv( - ... 10, 2, 3, regularizer='basis', num_bases=2).to(device) - >>> etypes = torch.tensor([0,1,2,0,1,2]).to(device) - >>> res = conv(g, feat, etypes) - >>> res - tensor([[-1.7774, -2.0184], - [-1.4335, -2.3758], - [-1.7774, -2.0184], - [-0.4698, -3.0876], - [-1.4335, -2.3758], - [-1.4331, -2.3295]], device='cuda:0', grad_fn=) - """ - - def __init__( - self, - in_feats: int, - out_feats: int, - num_rels: int, - regularizer: Optional[str] = None, - num_bases: Optional[int] = None, - bias: bool = True, - self_loop: bool = True, - dropout: float = 0.0, - apply_norm: bool = False, - ): - super().__init__() - self.in_feats = in_feats - self.out_feats = out_feats - self.num_rels = num_rels - self.apply_norm = apply_norm - self.dropout = nn.Dropout(dropout) - - dim_self_loop = 1 if self_loop else 0 - self.self_loop = self_loop - if regularizer is None: - self.W = nn.Parameter( - torch.empty(num_rels + dim_self_loop, in_feats, out_feats) - ) - self.coeff = None - elif regularizer == "basis": - if num_bases is None: - raise ValueError('Missing "num_bases" for basis regularization.') - self.W = nn.Parameter( - torch.empty(num_bases + dim_self_loop, in_feats, out_feats) - ) - self.coeff = nn.Parameter(torch.empty(num_rels, num_bases)) - self.num_bases = num_bases - else: - raise ValueError( - f"Supported regularizer options: 'basis' or None, but got " - f"'{regularizer}'." - ) - self.regularizer = regularizer - - if bias: - self.bias = nn.Parameter(torch.empty(out_feats)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - bound = 1 / math.sqrt(self.in_feats) - end = -1 if self.self_loop else None - nn.init.uniform_(self.W[:end], -bound, bound) - if self.regularizer == "basis": - nn.init.xavier_uniform_(self.coeff, gain=nn.init.calculate_gain("relu")) - if self.self_loop: - nn.init.xavier_uniform_(self.W[-1], nn.init.calculate_gain("relu")) - if self.bias is not None: - nn.init.zeros_(self.bias) - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - feat: torch.Tensor, - etypes: torch.Tensor, - max_in_degree: Optional[int] = None, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - g : DGLGraph - The graph. - feat : torch.Tensor - A 2D tensor of node features. Shape: :math:`(|V|, D_{in})`. - etypes : torch.Tensor - A 1D integer tensor of edge types. Shape: :math:`(|E|,)`. - Note that cugraph-ops only accepts edge type tensors in int32, - so any input of other integer types will be casted into int32, - thus introducing some overhead. Pass in int32 tensors directly - for best performance. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - - Returns - ------- - torch.Tensor - New node features. Shape: :math:`(|V|, D_{out})`. - """ - _graph = self.get_cugraph_ops_HeteroCSC( - g, - num_edge_types=self.num_rels, - etypes=etypes, - is_bipartite=False, - max_in_degree=max_in_degree, - ) - - h = ops_torch.operators.agg_hg_basis_n2n_post( - feat, - self.coeff, - _graph, - concat_own=self.self_loop, - norm_by_out_degree=self.apply_norm, - )[: g.num_dst_nodes()] - h = h @ self.W.view(-1, self.out_feats) - if self.bias is not None: - h = h + self.bias - h = self.dropout(h) - - return h diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py deleted file mode 100644 index b6198903766..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class SAGEConv(BaseConv): - r"""An accelerated GraphSAGE layer from `Inductive Representation Learning - on Large Graphs `, with the sparse - aggregation accelerated by cugraph-ops. - - Parameters - ---------- - in_feats : int or tuple - Input feature size. If a scalar is given, the source and destination - nodes are required to be the same. - out_feats : int - Output feature size. - aggregator_type : str - Aggregator type to use ("mean", "sum", "min", "max", "pool", "gcn"). - feat_drop : float - Dropout rate on features, default: ``0``. - bias : bool - If True, adds a learnable bias to the output. Default: ``True``. - - Examples - -------- - >>> import dgl - >>> import torch - >>> from cugraph_dgl.nn import SAGEConv - ... - >>> device = 'cuda' - >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) - >>> g = dgl.add_self_loop(g) - >>> feat = torch.ones(6, 10).to(device) - >>> conv = SAGEConv(10, 2, 'mean').to(device) - >>> res = conv(g, feat) - >>> res - tensor([[-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952], - [-1.1690, 0.1952]], device='cuda:0', grad_fn=) - """ - valid_aggr_types = {"mean", "sum", "min", "max", "pool", "gcn"} - - def __init__( - self, - in_feats: Union[int, tuple[int, int]], - out_feats: int, - aggregator_type: str = "mean", - feat_drop: float = 0.0, - bias: bool = True, - ): - super().__init__() - - if aggregator_type not in self.valid_aggr_types: - raise ValueError( - f"Invalid aggregator_type. Must be one of {self.valid_aggr_types}. " - f"But got '{aggregator_type}' instead." - ) - - self.aggregator_type = aggregator_type - self._aggr = aggregator_type - self.in_feats = in_feats - self.out_feats = out_feats - self.in_feats_src, self.in_feats_dst = dgl.utils.expand_as_pair(in_feats) - self.feat_drop = nn.Dropout(feat_drop) - - if self.aggregator_type == "gcn": - self._aggr = "mean" - self.lin = nn.Linear(self.in_feats_src, out_feats, bias=bias) - else: - self.lin = nn.Linear( - self.in_feats_src + self.in_feats_dst, out_feats, bias=bias - ) - - if self.aggregator_type == "pool": - self._aggr = "max" - self.pre_lin = nn.Linear(self.in_feats_src, self.in_feats_src) - else: - self.register_parameter("pre_lin", None) - - self.reset_parameters() - - def reset_parameters(self): - r"""Reinitialize learnable parameters.""" - self.lin.reset_parameters() - if self.pre_lin is not None: - self.pre_lin.reset_parameters() - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - feat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - max_in_degree: Optional[int] = None, - ) -> torch.Tensor: - r"""Forward computation. - - Parameters - ---------- - g : DGLGraph or SparseGraph - The graph. - feat : torch.Tensor or tuple - Node features. Shape: :math:`(|V|, D_{in})`. - max_in_degree : int - Maximum in-degree of destination nodes. When :attr:`g` is generated - from a neighbor sampler, the value should be set to the corresponding - :attr:`fanout`. This option is used to invoke the MFG-variant of - cugraph-ops kernel. - - Returns - ------- - torch.Tensor - Output node features. Shape: :math:`(|V|, D_{out})`. - """ - feat_bipartite = isinstance(feat, (list, tuple)) - graph_bipartite = feat_bipartite or self.aggregator_type == "pool" - - _graph = self.get_cugraph_ops_CSC( - g, is_bipartite=graph_bipartite, max_in_degree=max_in_degree - ) - - if feat_bipartite: - feat = (self.feat_drop(feat[0]), self.feat_drop(feat[1])) - else: - feat = self.feat_drop(feat) - - if self.aggregator_type == "pool": - if feat_bipartite: - feat = (self.pre_lin(feat[0]).relu(), feat[1]) - else: - feat = (self.pre_lin(feat).relu(), feat[: g.num_dst_nodes()]) - # force ctx.needs_input_grad=True in cugraph-ops autograd function - feat[0].requires_grad_() - feat[1].requires_grad_() - - out = ops_torch.operators.agg_concat_n2n(feat, _graph, self._aggr)[ - : g.num_dst_nodes() - ] - - if self.aggregator_type == "gcn": - out = out[:, : self.in_feats_src] - - out = self.lin(out) - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py deleted file mode 100644 index e77556fb76f..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -nn = import_optional("torch.nn") -ops_torch = import_optional("pylibcugraphops.pytorch") - - -class TransformerConv(BaseConv): - r"""The graph transformer layer from the `"Masked Label Prediction: - Unified Message Passing Model for Semi-Supervised Classification" - `_ paper. - - Parameters - ---------- - in_node_feats : int or pair of ints - Input feature size. A pair denotes feature sizes of source and - destination nodes. - out_node_feats : int - Output feature size. - num_heads : int - Number of multi-head-attentions. - concat : bool, optional - If False, the multi-head attentions are averaged instead of concatenated. - Default: ``True``. - beta : bool, optional - If True, use a gated residual connection. Default: ``True``. - edge_feats: int, optional - Edge feature size. Default: ``None``. - bias: bool, optional - If True, learns a bias term. Default: ``True``. - root_weight: bool, optional - If False, will skip to learn a root weight matrix. Default: ``True``. - """ - - def __init__( - self, - in_node_feats: Union[int, tuple[int, int]], - out_node_feats: int, - num_heads: int, - concat: bool = True, - beta: bool = False, - edge_feats: Optional[int] = None, - bias: bool = True, - root_weight: bool = True, - ): - super().__init__() - - self.in_node_feats = in_node_feats - self.out_node_feats = out_node_feats - self.num_heads = num_heads - self.concat = concat - self.beta = beta - self.edge_feats = edge_feats - self.bias = bias - self.root_weight = root_weight - - if isinstance(in_node_feats, int): - in_node_feats = (in_node_feats, in_node_feats) - - self.lin_key = nn.Linear(in_node_feats[0], num_heads * out_node_feats) - self.lin_query = nn.Linear(in_node_feats[1], num_heads * out_node_feats) - self.lin_value = nn.Linear(in_node_feats[0], num_heads * out_node_feats) - - if edge_feats is not None: - self.lin_edge = nn.Linear( - edge_feats, num_heads * out_node_feats, bias=False - ) - else: - self.lin_edge = self.register_parameter("lin_edge", None) - - if concat: - self.lin_skip = nn.Linear( - in_node_feats[1], num_heads * out_node_feats, bias=bias - ) - if self.beta: - self.lin_beta = nn.Linear(3 * num_heads * out_node_feats, 1, bias=bias) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - else: - self.lin_skip = nn.Linear(in_node_feats[1], out_node_feats, bias=bias) - if self.beta: - self.lin_beta = nn.Linear(3 * out_node_feats, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_key.reset_parameters() - self.lin_query.reset_parameters() - self.lin_value.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - if self.lin_skip is not None: - self.lin_skip.reset_parameters() - if self.lin_beta is not None: - self.lin_beta.reset_parameters() - - def forward( - self, - g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - efeat: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - """Forward computation. - - Parameters - ---------- - g: DGLGraph - The graph. - nfeat: torch.Tensor or a pair of torch.Tensor - Node feature tensor. A pair denotes features for source and - destination nodes, respectively. - efeat: torch.Tensor, optional - Edge feature tensor. Default: ``None``. - """ - feat_bipartite = isinstance(nfeat, (list, tuple)) - if not feat_bipartite: - nfeat = (nfeat, nfeat) - - _graph = self.get_cugraph_ops_CSC(g, is_bipartite=True) - - query = self.lin_query(nfeat[1][: g.num_dst_nodes()]) - key = self.lin_key(nfeat[0]) - value = self.lin_value(nfeat[0]) - - if efeat is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_feats must be set to allow " - f"edge features." - ) - efeat = self.lin_edge(efeat) - - out = ops_torch.operators.mha_simple_n2n( - key_emb=key, - query_emb=query, - value_emb=value, - graph=_graph, - num_heads=self.num_heads, - concat_heads=self.concat, - edge_emb=efeat, - norm_by_dim=True, - score_bias=None, - )[: g.num_dst_nodes()] - - if self.root_weight: - res = self.lin_skip(nfeat[1][: g.num_dst_nodes()]) - if self.lin_beta is not None: - beta = self.lin_beta(torch.cat([out, res, out - res], dim=-1)) - beta = beta.sigmoid() - out = beta * res + (1 - beta) * out - else: - out = out + res - - return out diff --git a/python/cugraph-dgl/cugraph_dgl/tests/__init__.py b/python/cugraph-dgl/cugraph_dgl/tests/__init__.py deleted file mode 100644 index 1144e9bab3f..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/cugraph-dgl/cugraph_dgl/tests/conftest.py b/python/cugraph-dgl/cugraph_dgl/tests/conftest.py deleted file mode 100644 index ee1183f5cd1..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/conftest.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2021-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import dgl -import torch - -from cugraph.testing.mg_utils import ( - start_dask_client, - stop_dask_client, -) - - -@pytest.fixture(scope="module") -def dask_client(): - # start_dask_client will check for the SCHEDULER_FILE and - # DASK_WORKER_DEVICES env vars and use them when creating a client if - # set. start_dask_client will also initialize the Comms singleton. - dask_client, dask_cluster = start_dask_client( - dask_worker_devices="0", protocol="tcp" - ) - - yield dask_client - - stop_dask_client(dask_client, dask_cluster) - - -class SparseGraphData1: - size = (6, 5) - nnz = 6 - src_ids = torch.IntTensor([0, 1, 2, 3, 2, 5]).cuda() - dst_ids = torch.IntTensor([1, 2, 3, 4, 0, 3]).cuda() - values = torch.IntTensor([10, 20, 30, 40, 50, 60]).cuda() - - # CSR - src_ids_sorted_by_src = torch.IntTensor([0, 1, 2, 2, 3, 5]).cuda() - dst_ids_sorted_by_src = torch.IntTensor([1, 2, 0, 3, 4, 3]).cuda() - csrc_ids = torch.IntTensor([0, 1, 2, 4, 5, 5, 6]).cuda() - values_csr = torch.IntTensor([10, 20, 50, 30, 40, 60]).cuda() - - # CSC - src_ids_sorted_by_dst = torch.IntTensor([2, 0, 1, 5, 2, 3]).cuda() - dst_ids_sorted_by_dst = torch.IntTensor([0, 1, 2, 3, 3, 4]).cuda() - cdst_ids = torch.IntTensor([0, 1, 2, 3, 5, 6]).cuda() - values_csc = torch.IntTensor([50, 10, 20, 60, 30, 40]).cuda() - - -@pytest.fixture -def sparse_graph_1(): - return SparseGraphData1() - - -@pytest.fixture -def dgl_graph_1(): - src = torch.tensor([0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9]) - dst = torch.tensor([1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0]) - return dgl.graph((src, dst)) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py deleted file mode 100644 index e2542657de4..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -try: - import cugraph_dgl -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -import dgl -import torch as th -from cugraph_dgl import cugraph_storage_from_heterograph -import tempfile -import numpy as np - - -def sample_dgl_graphs(g, train_nid, fanouts): - # Single fanout to match cugraph - sampler = dgl.dataloading.NeighborSampler(fanouts) - dataloader = dgl.dataloading.DataLoader( - g, - train_nid, - sampler, - batch_size=1, - shuffle=False, - drop_last=False, - num_workers=0, - ) - - dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return dgl_output - - -def sample_cugraph_dgl_graphs(cugraph_gs, train_nid, fanouts): - sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) - tempdir_object = tempfile.TemporaryDirectory() - sampling_output_dir = tempdir_object - dataloader = cugraph_dgl.dataloading.DaskDataLoader( - cugraph_gs, - train_nid, - sampler, - batch_size=1, - sampling_output_dir=sampling_output_dir.name, - drop_last=False, - shuffle=False, - ) - - cugraph_dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - cugraph_dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return cugraph_dgl_output - - -def test_same_heterograph_results(): - single_gpu = True - data_dict = { - ("B", "BA", "A"): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), - ("C", "CA", "A"): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), - } - train_nid = {"A": th.tensor([0])} - # Create a heterograph with 3 node types and 3 edges types. - dgl_g = dgl.heterograph(data_dict) - cugraph_gs = cugraph_storage_from_heterograph(dgl_g, single_gpu=single_gpu) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [{"BA": 1, "CA": 1}]) - cugraph_output = sample_cugraph_dgl_graphs(cugraph_gs, train_nid, [2]) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"]["A"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"]["A"].cpu().numpy() - np.testing.assert_array_equal(cugraph_output_nodes, dgl_output_nodes) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - - -def test_same_homogeneousgraph_results(): - single_gpu = True - train_nid = th.tensor([1]) - # Create a heterograph with 3 node types and 3 edges types. - dgl_g = dgl.graph(([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1])) - cugraph_gs = cugraph_storage_from_heterograph(dgl_g, single_gpu=single_gpu) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2]) - cugraph_output = sample_cugraph_dgl_graphs(cugraph_gs, train_nid, [2]) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() - np.testing.assert_array_equal(cugraph_output_nodes, dgl_output_nodes) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) - - -def test_heterograph_multi_block_results(): - data_dict = { - ("B", "BA", "A"): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), - ("C", "CA", "A"): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), - ("A", "AA", "A"): ([1], [0]), - } - dgl_g = dgl.heterograph(data_dict) - cugraph_g = cugraph_dgl.cugraph_storage_from_heterograph(dgl_g, single_gpu=True) - train_nid = {"A": th.tensor([0])} - cugraph_dgl_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [10, 10]) - assert ( - cugraph_dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_dgl_output[0]["blocks"][1].num_src_nodes() - ) - - -def test_homogenousgraph_multi_block_results(): - dgl_g = dgl.graph(data=([1, 2, 2, 3, 4, 5], [0, 0, 1, 2, 2, 3])) - cugraph_g = cugraph_dgl.cugraph_storage_from_heterograph(dgl_g, single_gpu=True) - train_nid = th.tensor([0]) - cugraph_dgl_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [2, 2, 2]) - assert ( - cugraph_dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_dgl_output[0]["blocks"][1].num_src_nodes() - ) - assert ( - cugraph_dgl_output[0]["blocks"][1].num_dst_nodes() - == cugraph_dgl_output[0]["blocks"][2].num_src_nodes() - ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py deleted file mode 100644 index d49e1293e77..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import pytest - -try: - import cugraph_dgl -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -import dgl -import torch as th -from cugraph_dgl import cugraph_storage_from_heterograph -import tempfile -import numpy as np - - -def sample_dgl_graphs(g, train_nid, fanouts): - # Single fanout to match cugraph - sampler = dgl.dataloading.NeighborSampler(fanouts) - dataloader = dgl.dataloading.DataLoader( - g, - train_nid, - sampler, - batch_size=1, - shuffle=False, - drop_last=False, - num_workers=0, - ) - - dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return dgl_output - - -def sample_cugraph_dgl_graphs(cugraph_gs, train_nid, fanouts): - sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) - tempdir_object = tempfile.TemporaryDirectory() - sampling_output_dir = tempdir_object - dataloader = cugraph_dgl.dataloading.DaskDataLoader( - cugraph_gs, - train_nid, - sampler, - batch_size=1, - sampling_output_dir=sampling_output_dir.name, - drop_last=False, - shuffle=False, - ) - - cugraph_dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - cugraph_dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return cugraph_dgl_output - - -def test_same_heterograph_results(dask_client): - single_gpu = False - data_dict = { - ("B", "BA", "A"): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), - ("C", "CA", "A"): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), - } - train_nid = {"A": th.tensor([0])} - # Create a heterograph with 3 node types and 3 edges types. - dgl_g = dgl.heterograph(data_dict) - cugraph_gs = cugraph_storage_from_heterograph(dgl_g, single_gpu=single_gpu) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [{"BA": 1, "CA": 1}]) - cugraph_output = sample_cugraph_dgl_graphs(cugraph_gs, train_nid, [2]) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"]["A"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"]["A"].cpu().numpy() - np.testing.assert_array_equal(cugraph_output_nodes, dgl_output_nodes) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - - -def test_same_homogeneousgraph_results(dask_client): - single_gpu = False - train_nid = th.tensor([1]) - # Create a heterograph with 3 node types and 3 edges types. - dgl_g = dgl.graph(([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1])) - cugraph_gs = cugraph_storage_from_heterograph(dgl_g, single_gpu=single_gpu) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2]) - cugraph_output = sample_cugraph_dgl_graphs(cugraph_gs, train_nid, [2]) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() - np.testing.assert_array_equal(cugraph_output_nodes, dgl_output_nodes) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py deleted file mode 100644 index 419ec7790a9..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import cugraph_dgl.dataloading -import pytest - -import cugraph_dgl - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -import numpy as np - -torch = import_optional("torch") -dgl = import_optional("dgl") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -def test_dataloader_basic_homogeneous(): - graph = cugraph_dgl.Graph(is_multi_gpu=False) - - num_nodes = karate.number_of_nodes() - graph.add_nodes(num_nodes, data={"z": torch.arange(num_nodes)}) - - edf = karate.get_edgelist() - graph.add_edges( - u=edf["src"], v=edf["dst"], data={"q": torch.arange(karate.number_of_edges())} - ) - - sampler = cugraph_dgl.dataloading.NeighborSampler([5, 5, 5]) - loader = cugraph_dgl.dataloading.FutureDataLoader( - graph, torch.arange(num_nodes), sampler, batch_size=2 - ) - - for in_t, out_t, blocks in loader: - assert len(blocks) == 3 - assert len(out_t) <= 2 - - -def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1, prob_attr=None): - # Single fanout to match cugraph - sampler = dgl.dataloading.NeighborSampler( - fanouts, - prob=prob_attr, - ) - dataloader = dgl.dataloading.DataLoader( - g, - train_nid, - sampler, - batch_size=batch_size, - shuffle=False, - drop_last=False, - num_workers=0, - ) - - dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return dgl_output - - -def sample_cugraph_dgl_graphs( - cugraph_g, train_nid, fanouts, batch_size=1, prob_attr=None -): - sampler = cugraph_dgl.dataloading.NeighborSampler( - fanouts, - prob=prob_attr, - ) - - dataloader = cugraph_dgl.dataloading.FutureDataLoader( - cugraph_g, - train_nid, - sampler, - batch_size=batch_size, - drop_last=False, - shuffle=False, - ) - - cugraph_dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - cugraph_dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return cugraph_dgl_output - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -@pytest.mark.parametrize("ix", [[1], [1, 0]]) -@pytest.mark.parametrize("batch_size", [1, 2]) -def test_same_homogeneousgraph_results(ix, batch_size): - src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) - dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) - - train_nid = torch.tensor(ix) - # Create a heterograph with 3 node types and 3 edges types. - dgl_g = dgl.graph((src, dst)) - - cugraph_g = cugraph_dgl.Graph(is_multi_gpu=False) - cugraph_g.add_nodes(9) - cugraph_g.add_edges(u=src, v=dst) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2], batch_size=batch_size) - cugraph_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [2], batch_size) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() - - np.testing.assert_array_equal( - np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) - ) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -def test_dataloader_biased_homogeneous(): - src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) - dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) - wgt = torch.tensor([1, 1, 2, 0, 0, 0, 2, 1], dtype=torch.float32) - - train_nid = torch.tensor([0, 1]) - # Create a heterograph with 3 node types and 3 edges types. - dgl_g = dgl.graph((src, dst)) - dgl_g.edata["wgt"] = wgt - - cugraph_g = cugraph_dgl.Graph(is_multi_gpu=False) - cugraph_g.add_nodes(9) - cugraph_g.add_edges(u=src, v=dst, data={"wgt": wgt}) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [4], batch_size=2, prob_attr="wgt") - cugraph_output = sample_cugraph_dgl_graphs( - cugraph_g, train_nid, [4], batch_size=2, prob_attr="wgt" - ) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() - - np.testing.assert_array_equal( - np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) - ) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) - assert 5 == cugraph_output[0]["blocks"][0].num_edges() diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py deleted file mode 100644 index 061f4fa2077..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import numpy as np - -import cugraph_dgl - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph.gnn import ( - cugraph_comms_create_unique_id, - cugraph_comms_shutdown, -) - -from cugraph_dgl.tests.utils import init_pytorch_worker - -torch = import_optional("torch") -dgl = import_optional("dgl") - - -def run_test_dataloader_basic_homogeneous(rank, world_size, uid): - init_pytorch_worker(rank, world_size, uid) - - graph = cugraph_dgl.Graph(is_multi_gpu=True) - - num_nodes = karate.number_of_nodes() - graph.add_nodes( - num_nodes, - ) - - edf = karate.get_edgelist() - graph.add_edges( - u=torch.tensor_split(torch.as_tensor(edf["src"], device="cuda"), world_size)[ - rank - ], - v=torch.tensor_split(torch.as_tensor(edf["dst"], device="cuda"), world_size)[ - rank - ], - ) - - sampler = cugraph_dgl.dataloading.NeighborSampler([5, 5, 5]) - loader = cugraph_dgl.dataloading.FutureDataLoader( - graph, - torch.arange(num_nodes), - sampler, - batch_size=2, - use_ddp=True, - ) - - for in_t, out_t, blocks in loader: - assert len(blocks) == 3 - assert len(out_t) <= 2 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -def test_dataloader_basic_homogeneous(): - uid = cugraph_comms_create_unique_id() - # Limit the number of GPUs this rest is run with - world_size = min(torch.cuda.device_count(), 4) - - torch.multiprocessing.spawn( - run_test_dataloader_basic_homogeneous, - args=( - world_size, - uid, - ), - nprocs=world_size, - ) - - -def sample_dgl_graphs( - g, - train_nid, - fanouts, - batch_size=1, - prob_attr=None, -): - # Single fanout to match cugraph - sampler = dgl.dataloading.NeighborSampler( - fanouts, - prob=prob_attr, - ) - dataloader = dgl.dataloading.DataLoader( - g, - train_nid, - sampler, - batch_size=batch_size, - shuffle=False, - drop_last=False, - num_workers=0, - ) - - dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return dgl_output - - -def sample_cugraph_dgl_graphs( - cugraph_g, - train_nid, - fanouts, - batch_size=1, - prob_attr=None, -): - sampler = cugraph_dgl.dataloading.NeighborSampler( - fanouts, - prob=prob_attr, - ) - - dataloader = cugraph_dgl.dataloading.FutureDataLoader( - cugraph_g, - train_nid, - sampler, - batch_size=batch_size, - drop_last=False, - shuffle=False, - ) - - cugraph_dgl_output = {} - for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - cugraph_dgl_output[batch_id] = { - "input_nodes": input_nodes, - "output_nodes": output_nodes, - "blocks": blocks, - } - return cugraph_dgl_output - - -def run_test_same_homogeneousgraph_results(rank, world_size, uid, ix, batch_size): - init_pytorch_worker(rank, world_size, uid) - - src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) - dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) - - local_src = torch.tensor_split(src, world_size)[rank] - local_dst = torch.tensor_split(dst, world_size)[rank] - - train_nid = torch.tensor(ix) - # Create a heterograph with 3 node types and 3 edges types. - dgl_g = dgl.graph((src, dst)) - - cugraph_g = cugraph_dgl.Graph(is_multi_gpu=True) - cugraph_g.add_nodes(9) - cugraph_g.add_edges(u=local_src, v=local_dst) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2], batch_size=batch_size) - cugraph_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [2], batch_size) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() - - np.testing.assert_array_equal( - np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) - ) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) - - cugraph_comms_shutdown() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -@pytest.mark.parametrize("ix", [[1], [1, 0]]) -@pytest.mark.parametrize("batch_size", [1, 2]) -def test_same_homogeneousgraph_results_mg(ix, batch_size): - uid = cugraph_comms_create_unique_id() - # Limit the number of GPUs this rest is run with - world_size = min(torch.cuda.device_count(), 4) - - torch.multiprocessing.spawn( - run_test_same_homogeneousgraph_results, - args=(world_size, uid, ix, batch_size), - nprocs=world_size, - ) - - -def run_test_dataloader_biased_homogeneous(rank, world_size, uid): - init_pytorch_worker(rank, world_size, uid, True) - - src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) + (rank * 9) - dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) + (rank * 9) - wgt = torch.tensor( - [0.1, 0.1, 0.2, 0, 0, 0, 0.2, 0.1] * world_size, dtype=torch.float32 - ) - - train_nid = torch.tensor([0, 1]) + (rank * 9) - # Create a heterograph with 3 node types and 3 edge types. - dgl_g = dgl.graph((src, dst)) - dgl_g.edata["wgt"] = wgt[:8] - - cugraph_g = cugraph_dgl.Graph(is_multi_gpu=True) - cugraph_g.add_nodes(9 * world_size) - cugraph_g.add_edges(u=src, v=dst, data={"wgt": wgt}) - - dgl_output = sample_dgl_graphs(dgl_g, train_nid, [4], batch_size=2, prob_attr="wgt") - cugraph_output = sample_cugraph_dgl_graphs( - cugraph_g, train_nid, [4], batch_size=2, prob_attr="wgt" - ) - - cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() - dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() - - np.testing.assert_array_equal( - np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) - ) - assert ( - dgl_output[0]["blocks"][0].num_dst_nodes() - == cugraph_output[0]["blocks"][0].num_dst_nodes() - ) - assert ( - dgl_output[0]["blocks"][0].num_edges() - == cugraph_output[0]["blocks"][0].num_edges() - ) - - assert 5 == cugraph_output[0]["blocks"][0].num_edges() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -def test_dataloader_biased_homogeneous_mg(): - uid = cugraph_comms_create_unique_id() - # Limit the number of GPUs this test is run with - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_dataloader_biased_homogeneous, - args=(world_size, uid), - nprocs=world_size, - ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataset.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataset.py deleted file mode 100644 index 5db443dc0d8..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataset.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -try: - import cugraph_dgl - - del cugraph_dgl -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -from dgl.dataloading import MultiLayerNeighborSampler -import dgl -import torch -import cudf -import pandas as pd -import cupy as cp -import numpy as np -from cugraph_dgl.dataloading.utils.sampling_helpers import ( - create_homogeneous_sampled_graphs_from_dataframe, -) - - -def get_edge_df_from_homogenous_block(block): - block = block.to("cpu") - src, dst, eid = block.edges("all") - src = block.srcdata[dgl.NID][src] - dst = block.dstdata[dgl.NID][dst] - eid = block.edata[dgl.EID][eid] - df = pd.DataFrame({"src": src, "dst": dst, "eid": eid}) - return df.sort_values(by="eid").reset_index(drop=True) - - -def create_dgl_mfgs(g, seed_nodes, fanout): - sampler = MultiLayerNeighborSampler(fanout) - return sampler.sample_blocks(g, seed_nodes) - - -def create_cugraph_dgl_homogenous_mfgs(dgl_blocks, return_type): - df_ls = [] - unique_vertices_ls = [] - for hop_id, block in enumerate(reversed(dgl_blocks)): - block = block.to("cpu") - src, dst, eid = block.edges("all") - eid = block.edata[dgl.EID][eid] - - og_src = block.srcdata[dgl.NID][src] - og_dst = block.dstdata[dgl.NID][dst] - unique_vertices = pd.concat( - [pd.Series(og_dst.numpy()), pd.Series(og_src.numpy())] - ).drop_duplicates(keep="first") - unique_vertices_ls.append(unique_vertices) - df = cudf.DataFrame( - { - "sources": cp.asarray(src), - "destinations": cp.asarray(dst), - "edge_id": cp.asarray(eid), - } - ) - df["hop_id"] = hop_id - df_ls.append(df) - df = cudf.concat(df_ls, ignore_index=True) - df["batch_id"] = 0 - - # Add map column - # to the dataframe - renumberd_map = pd.concat(unique_vertices_ls).drop_duplicates(keep="first").values - offsets = np.asarray([2, 2 + len(renumberd_map)]) - map_ar = np.concatenate([offsets, renumberd_map]) - map_ser = cudf.Series(map_ar) - # Have to reindex cause map_ser can be of larger length than df - df = df.reindex(df.index.union(map_ser.index)) - df["map"] = map_ser - return create_homogeneous_sampled_graphs_from_dataframe( - df, return_type=return_type - )[0] - - -@pytest.mark.parametrize("return_type", ["dgl.Block", "cugraph_dgl.nn.SparseGraph"]) -@pytest.mark.parametrize("seed_node", [3, 4, 5]) -def test_homogeneous_sampled_graphs_from_dataframe(return_type, seed_node): - g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 5])) - fanout = [1, 1, 1] - seed_node = torch.as_tensor([seed_node]) - - dgl_seed_nodes, dgl_output_nodes, dgl_mfgs = create_dgl_mfgs(g, seed_node, fanout) - ( - cugraph_seed_nodes, - cugraph_output_nodes, - cugraph_mfgs, - ) = create_cugraph_dgl_homogenous_mfgs(dgl_mfgs, return_type=return_type) - - np.testing.assert_equal( - cugraph_seed_nodes.cpu().numpy().copy().sort(), - dgl_seed_nodes.cpu().numpy().copy().sort(), - ) - - np.testing.assert_equal( - dgl_output_nodes.cpu().numpy().copy().sort(), - cugraph_output_nodes.cpu().numpy().copy().sort(), - ) - - if return_type == "dgl.Block": - for dgl_block, cugraph_dgl_block in zip(dgl_mfgs, cugraph_mfgs): - dgl_df = get_edge_df_from_homogenous_block(dgl_block) - cugraph_dgl_df = get_edge_df_from_homogenous_block(cugraph_dgl_block) - pd.testing.assert_frame_equal(dgl_df, cugraph_dgl_df) - else: - for dgl_block, cugraph_dgl_graph in zip(dgl_mfgs, cugraph_mfgs): - # Can not verify edge ids as they are not - # preserved in cugraph_dgl.nn.SparseGraph - assert dgl_block.num_src_nodes() == cugraph_dgl_graph.num_src_nodes() - assert dgl_block.num_dst_nodes() == cugraph_dgl_graph.num_dst_nodes() - dgl_offsets, dgl_indices, _ = dgl_block.adj_tensors("csc") - cugraph_offsets, cugraph_indices, _ = cugraph_dgl_graph.csc() - assert torch.equal(dgl_offsets.to("cpu"), cugraph_offsets.to("cpu")) - assert torch.equal(dgl_indices.to("cpu"), cugraph_indices.to("cpu")) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py deleted file mode 100644 index de27efc6329..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import GATConv as CuGraphGATConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("mode", ["bipartite", "share_weights", "regular"]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("residual", [False, True]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_gatconv_equality( - dgl_graph_1, - mode, - idx_type, - max_in_degree, - num_heads, - residual, - to_block, - sparse_format, -): - from dgl.nn.pytorch import GATConv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - - if mode == "bipartite": - in_feats = (10, 3) - nfeat = ( - torch.randn(size[0], in_feats[0]).to(device), - torch.randn(size[1], in_feats[1]).to(device), - ) - elif mode == "share_weights": - in_feats = 5 - nfeat = ( - torch.randn(size[0], in_feats).to(device), - torch.randn(size[1], in_feats).to(device), - ) - else: - in_feats = 7 - nfeat = torch.randn(size[0], in_feats).to(device) - out_feats = 2 - - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - args = (in_feats, out_feats, num_heads) - kwargs = {"bias": False, "allow_zero_in_degree": True, "residual": residual} - - conv1 = GATConv(*args, **kwargs).to(device) - conv2 = CuGraphGATConv(*args, **kwargs).to(device) - - dim = num_heads * out_feats - with torch.no_grad(): - conv2.attn_weights[:dim].copy_(conv1.attn_l.flatten()) - conv2.attn_weights[dim:].copy_(conv1.attn_r.flatten()) - if mode == "bipartite": - conv2.lin_src.weight.copy_(conv1.fc_src.weight) - conv2.lin_dst.weight.copy_(conv1.fc_dst.weight) - else: - conv2.lin.weight.copy_(conv1.fc.weight) - if residual and conv1.has_linear_res: - conv2.lin_res.weight.copy_(conv1.res_fc.weight) - - out1 = conv1(g, nfeat) - if sparse_format is not None: - out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) - else: - out2 = conv2(g, nfeat, max_in_degree=max_in_degree) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out1 = torch.randn_like(out1) - grad_out2 = grad_out1.detach().clone() - out1.backward(grad_out1) - out2.backward(grad_out2) - - if mode == "bipartite": - assert torch.allclose( - conv1.fc_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.fc_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL - ) - else: - assert torch.allclose(conv1.fc.weight.grad, conv2.lin.weight.grad, atol=ATOL) - - if residual and conv1.has_linear_res: - assert torch.allclose( - conv1.res_fc.weight.grad, conv2.lin_res.weight.grad, atol=ATOL - ) - - assert torch.allclose( - torch.cat((conv1.attn_l.grad, conv1.attn_r.grad), dim=0), - conv2.attn_weights.grad.view(2, num_heads, out_feats), - atol=1e-5, # Note: using a loosened tolerance here due to numerical error - ) - - -@pytest.mark.parametrize("bias", [False, True]) -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("use_edge_feats", [False, True]) -def test_gatconv_edge_feats( - dgl_graph_1, - bias, - bipartite, - concat, - max_in_degree, - num_heads, - to_block, - use_edge_feats, -): - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device) - - if to_block: - g = dgl.to_block(g) - - if bipartite: - in_feats = (10, 3) - nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).to(device), - torch.rand(g.num_dst_nodes(), in_feats[1]).to(device), - ) - else: - in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).to(device) - out_feats = 2 - - if use_edge_feats: - edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats).to(device) - else: - edge_feats = None - efeat = None - - conv = CuGraphGATConv( - in_feats, - out_feats, - num_heads, - concat=concat, - edge_feats=edge_feats, - bias=bias, - allow_zero_in_degree=True, - ).to(device) - out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) - - grad_out = torch.randn_like(out) - out.backward(grad_out) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py deleted file mode 100644 index 2d26b7fdc28..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import GATv2Conv as CuGraphGATv2Conv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-5 - - -@pytest.mark.parametrize("mode", ["bipartite", "share_weights", "regular"]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("residual", [False, True]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_gatv2conv_equality( - dgl_graph_1, - mode, - idx_type, - max_in_degree, - num_heads, - residual, - to_block, - sparse_format, -): - from dgl.nn.pytorch import GATv2Conv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - - if mode == "bipartite": - in_feats = (10, 3) - nfeat = ( - torch.randn(size[0], in_feats[0]).to(device), - torch.randn(size[1], in_feats[1]).to(device), - ) - elif mode == "share_weights": - in_feats = 5 - nfeat = ( - torch.randn(size[0], in_feats).to(device), - torch.randn(size[1], in_feats).to(device), - ) - else: - in_feats = 7 - nfeat = torch.randn(size[0], in_feats).to(device) - out_feats = 2 - - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - args = (in_feats, out_feats, num_heads) - kwargs = { - "bias": False, - "allow_zero_in_degree": True, - "residual": residual, - "share_weights": mode == "share_weights", - } - - conv1 = GATv2Conv(*args, **kwargs).to(device) - conv2 = CuGraphGATv2Conv(*args, **kwargs).to(device) - - with torch.no_grad(): - conv2.attn_weights.copy_(conv1.attn.flatten()) - conv2.lin_src.weight.copy_(conv1.fc_src.weight) - conv2.lin_dst.weight.copy_(conv1.fc_dst.weight) - if residual: - conv2.lin_res.weight.copy_(conv1.res_fc.weight) - - out1 = conv1(g, nfeat) - if sparse_format is not None: - out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) - else: - out2 = conv2(g, nfeat, max_in_degree=max_in_degree) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out1 = torch.randn_like(out1) - grad_out2 = grad_out1.detach().clone() - out1.backward(grad_out1) - out2.backward(grad_out2) - - assert torch.allclose( - conv1.fc_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.fc_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL - ) - - if residual: - assert torch.allclose( - conv1.res_fc.weight.grad, conv2.lin_res.weight.grad, atol=ATOL - ) - - assert torch.allclose( - conv1.attn.grad, - conv2.attn_weights.grad.view(1, num_heads, out_feats), - atol=ATOL, - ) - - -@pytest.mark.parametrize("bias", [False, True]) -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_heads", [1, 2, 7]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("use_edge_feats", [False, True]) -def test_gatv2conv_edge_feats( - dgl_graph_1, - bias, - bipartite, - concat, - max_in_degree, - num_heads, - to_block, - use_edge_feats, -): - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device) - - if to_block: - g = dgl.to_block(g) - - if bipartite: - in_feats = (10, 3) - nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).to(device), - torch.rand(g.num_dst_nodes(), in_feats[1]).to(device), - ) - else: - in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).to(device) - out_feats = 2 - - if use_edge_feats: - edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats).to(device) - else: - edge_feats = None - efeat = None - - conv = CuGraphGATv2Conv( - in_feats, - out_feats, - num_heads, - concat=concat, - edge_feats=edge_feats, - bias=bias, - allow_zero_in_degree=True, - ).to(device) - out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) - - grad_out = torch.randn_like(out) - out.backward(grad_out) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py deleted file mode 100644 index b5d3686c609..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import RelGraphConv as CuGraphRelGraphConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("num_bases", [1, 2, 5]) -@pytest.mark.parametrize("regularizer", [None, "basis"]) -@pytest.mark.parametrize("self_loop", [False, True]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_relgraphconv_equality( - dgl_graph_1, - idx_type, - max_in_degree, - num_bases, - regularizer, - self_loop, - to_block, - sparse_format, -): - from dgl.nn.pytorch import RelGraphConv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - in_feat, out_feat, num_rels = 10, 2, 3 - args = (in_feat, out_feat, num_rels) - kwargs = { - "num_bases": num_bases, - "regularizer": regularizer, - "bias": False, - "self_loop": self_loop, - } - - g.edata[dgl.ETYPE] = torch.randint(num_rels, (g.num_edges(),)).to(device) - size = (g.num_src_nodes(), g.num_dst_nodes()) - feat = torch.rand(g.num_src_nodes(), in_feat).to(device) - - if sparse_format == "coo": - sg = SparseGraph( - size=size, - src_ids=g.edges()[0], - dst_ids=g.edges()[1], - values=g.edata[dgl.ETYPE], - formats="csc", - ) - elif sparse_format == "csc": - offsets, indices, perm = g.adj_tensors("csc") - etypes = g.edata[dgl.ETYPE][perm] - sg = SparseGraph( - size=size, src_ids=indices, cdst_ids=offsets, values=etypes, formats="csc" - ) - - conv1 = RelGraphConv(*args, **kwargs).to(device) - conv2 = CuGraphRelGraphConv(*args, **kwargs, apply_norm=False).to(device) - - with torch.no_grad(): - if self_loop: - conv2.W[:-1].copy_(conv1.linear_r.W) - conv2.W[-1].copy_(conv1.loop_weight) - else: - conv2.W.copy_(conv1.linear_r.W) - - if regularizer is not None: - conv2.coeff.copy_(conv1.linear_r.coeff) - - out1 = conv1(g, feat, g.edata[dgl.ETYPE]) - - if sparse_format is not None: - out2 = conv2(sg, feat, sg.values(), max_in_degree=max_in_degree) - else: - out2 = conv2(g, feat, g.edata[dgl.ETYPE], max_in_degree=max_in_degree) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.randn_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - end = -1 if self_loop else None - assert torch.allclose(conv1.linear_r.W.grad, conv2.W.grad[:end], atol=ATOL) - - if self_loop: - assert torch.allclose(conv1.loop_weight.grad, conv2.W.grad[-1], atol=ATOL) - - if regularizer is not None: - assert torch.allclose(conv1.linear_r.coeff.grad, conv2.coeff.grad, atol=ATOL) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py deleted file mode 100644 index 3f1c2b1b3fe..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import SAGEConv as CuGraphSAGEConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("aggr", ["mean", "pool"]) -@pytest.mark.parametrize("bias", [False, True]) -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("max_in_degree", [None, 8]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_sageconv_equality( - dgl_graph_1, aggr, bias, bipartite, idx_type, max_in_degree, to_block, sparse_format -): - from dgl.nn.pytorch import SAGEConv - - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - - if bipartite: - in_feats = (5, 3) - feat = ( - torch.rand(size[0], in_feats[0], requires_grad=True).to(device), - torch.rand(size[1], in_feats[1], requires_grad=True).to(device), - ) - else: - in_feats = 5 - feat = torch.rand(size[0], in_feats).to(device) - out_feats = 2 - - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - kwargs = {"aggregator_type": aggr, "bias": bias} - conv1 = SAGEConv(in_feats, out_feats, **kwargs).to(device) - conv2 = CuGraphSAGEConv(in_feats, out_feats, **kwargs).to(device) - - in_feats_src = conv2.in_feats_src - with torch.no_grad(): - conv2.lin.weight[:, :in_feats_src].copy_(conv1.fc_neigh.weight) - conv2.lin.weight[:, in_feats_src:].copy_(conv1.fc_self.weight) - if bias: - conv2.lin.bias.copy_(conv1.fc_self.bias) - if aggr == "pool": - conv2.pre_lin.weight.copy_(conv1.fc_pool.weight) - conv2.pre_lin.bias.copy_(conv1.fc_pool.bias) - - out1 = conv1(g, feat) - if sparse_format is not None: - out2 = conv2(sg, feat, max_in_degree=max_in_degree) - else: - out2 = conv2(g, feat, max_in_degree=max_in_degree) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.randn_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - assert torch.allclose( - conv1.fc_neigh.weight.grad, - conv2.lin.weight.grad[:, :in_feats_src], - atol=ATOL, - ) - assert torch.allclose( - conv1.fc_self.weight.grad, - conv2.lin.weight.grad[:, in_feats_src:], - atol=ATOL, - ) - if bias: - assert torch.allclose(conv1.fc_self.bias.grad, conv2.lin.bias.grad, atol=ATOL) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sparsegraph.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sparsegraph.py deleted file mode 100644 index 09c0df202ff..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sparsegraph.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph.utilities.utils import import_optional -from cugraph_dgl.nn import SparseGraph - -torch = import_optional("torch") - - -def test_coo2csc(sparse_graph_1): - data = sparse_graph_1 - - g = SparseGraph( - size=data.size, - src_ids=data.src_ids, - dst_ids=data.dst_ids, - values=data.values, - formats=["csc"], - ) - cdst_ids, src_ids, values = g.csc() - - new = torch.sparse_csc_tensor(cdst_ids, src_ids, values).cuda() - old = torch.sparse_coo_tensor( - torch.vstack((data.src_ids, data.dst_ids)), data.values - ).cuda() - torch.allclose(new.to_dense(), old.to_dense()) - - -def test_csc_input(sparse_graph_1): - data = sparse_graph_1 - - g = SparseGraph( - size=data.size, - src_ids=data.src_ids_sorted_by_dst, - cdst_ids=data.cdst_ids, - values=data.values_csc, - formats=["coo", "csc", "csr"], - ) - src_ids, dst_ids, values = g.coo() - - new = torch.sparse_coo_tensor(torch.vstack((src_ids, dst_ids)), values).cuda() - old = torch.sparse_csc_tensor( - data.cdst_ids, data.src_ids_sorted_by_dst, data.values_csc - ).cuda() - torch.allclose(new.to_dense(), old.to_dense()) - - csrc_ids, dst_ids, values = g.csr() - - new = torch.sparse_csr_tensor(csrc_ids, dst_ids, values).cuda() - torch.allclose(new.to_dense(), old.to_dense()) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py deleted file mode 100644 index 28d13dedec8..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_dgl.nn.conv.base import SparseGraph -from cugraph_dgl.nn import TransformerConv - -dgl = pytest.importorskip("dgl", reason="DGL not available") -torch = pytest.importorskip("torch", reason="PyTorch not available") - -ATOL = 1e-6 - - -@pytest.mark.parametrize("beta", [False, True]) -@pytest.mark.parametrize("bipartite_node_feats", [False, True]) -@pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) -@pytest.mark.parametrize("num_heads", [1, 3, 4]) -@pytest.mark.parametrize("to_block", [False, True]) -@pytest.mark.parametrize("use_edge_feats", [False, True]) -@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_transformerconv( - dgl_graph_1, - beta, - bipartite_node_feats, - concat, - idx_type, - num_heads, - to_block, - use_edge_feats, - sparse_format, -): - torch.manual_seed(12345) - device = torch.device("cuda") - g = dgl_graph_1.to(device).astype(idx_type) - - if to_block: - g = dgl.to_block(g) - - size = (g.num_src_nodes(), g.num_dst_nodes()) - if sparse_format == "coo": - sg = SparseGraph( - size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" - ) - elif sparse_format == "csc": - offsets, indices, _ = g.adj_tensors("csc") - sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - - if bipartite_node_feats: - in_node_feats = (5, 3) - nfeat = ( - torch.rand(g.num_src_nodes(), in_node_feats[0], device=device), - torch.rand(g.num_dst_nodes(), in_node_feats[1], device=device), - ) - else: - in_node_feats = 3 - nfeat = torch.rand(g.num_src_nodes(), in_node_feats, device=device) - out_node_feats = 2 - - if use_edge_feats: - edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats, device=device) - else: - edge_feats = None - efeat = None - - conv = TransformerConv( - in_node_feats, - out_node_feats, - num_heads=num_heads, - concat=concat, - beta=beta, - edge_feats=edge_feats, - ).to(device) - - if sparse_format is not None: - out = conv(sg, nfeat, efeat) - else: - out = conv(g, nfeat, efeat) - - grad_out = torch.randn_like(out) - out.backward(grad_out) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_cugraph_storage.py b/python/cugraph-dgl/cugraph_dgl/tests/test_cugraph_storage.py deleted file mode 100644 index 0a99d4d65b7..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_cugraph_storage.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -try: - import cugraph_dgl -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional -import cudf -import numpy as np -from cugraph_dgl import CuGraphStorage -from .utils import assert_same_sampling_len - -th = import_optional("torch") -dgl = import_optional("dgl") - - -@pytest.fixture() -def dgl_graph(): - graph_data = { - ("nt.a", "connects", "nt.b"): ( - th.tensor([0, 1, 2]), - th.tensor([0, 1, 2]), - ), - ("nt.a", "connects", "nt.c"): ( - th.tensor([0, 1, 2]), - th.tensor([0, 1, 2]), - ), - ("nt.c", "connects", "nt.c"): ( - th.tensor([1, 3, 4, 5]), - th.tensor([0, 0, 0, 0]), - ), - } - g = dgl.heterograph(graph_data) - return g - - -def test_cugraphstore_basic_apis(): - - num_nodes_dict = {"drug": 3, "gene": 2, "disease": 1} - # edges - drug_interacts_drug_df = cudf.DataFrame({"src": [0, 1], "dst": [1, 2]}) - drug_interacts_gene = cudf.DataFrame({"src": [0, 1], "dst": [0, 1]}) - drug_treats_disease = cudf.DataFrame({"src": [1], "dst": [0]}) - data_dict = { - ("drug", "interacts", "drug"): drug_interacts_drug_df, - ("drug", "interacts", "gene"): drug_interacts_gene, - ("drug", "treats", "disease"): drug_treats_disease, - } - gs = CuGraphStorage(data_dict=data_dict, num_nodes_dict=num_nodes_dict) - # add node data - gs.add_node_data( - ntype="drug", - feat_name="node_feat", - feat_obj=th.as_tensor([0.1, 0.2, 0.3], dtype=th.float64), - ) - # add edge data - gs.add_edge_data( - canonical_etype=("drug", "interacts", "drug"), - feat_name="edge_feat", - feat_obj=th.as_tensor([0.2, 0.4], dtype=th.float64), - ) - - assert gs.num_nodes() == 6 - - assert gs.num_edges(("drug", "interacts", "drug")) == 2 - assert gs.num_edges(("drug", "interacts", "gene")) == 2 - assert gs.num_edges(("drug", "treats", "disease")) == 1 - - node_feat = ( - gs.get_node_storage(key="node_feat", ntype="drug") - .fetch([0, 1, 2]) - .to("cpu") - .numpy() - ) - np.testing.assert_equal(node_feat, np.asarray([0.1, 0.2, 0.3])) - - edge_feat = ( - gs.get_edge_storage(key="edge_feat", etype=("drug", "interacts", "drug")) - .fetch([0, 1]) - .to("cpu") - .numpy() - ) - np.testing.assert_equal(edge_feat, np.asarray([0.2, 0.4])) - - -def test_sampling_heterograph(dgl_graph): - cugraph_gs = cugraph_dgl.cugraph_storage_from_heterograph(dgl_graph) - - for fanout in [1, 2, 3, -1]: - for ntype in ["nt.a", "nt.b", "nt.c"]: - for d in ["in", "out"]: - assert_same_sampling_len( - dgl_graph, - cugraph_gs, - nodes={ntype: [0]}, - fanout=fanout, - edge_dir=d, - ) - - -def test_sampling_homogenous(): - src_ar = np.asarray([0, 1, 2, 0, 1, 2, 7, 9, 10, 11], dtype=np.int32) - dst_ar = np.asarray([3, 4, 5, 6, 7, 8, 6, 6, 6, 6], dtype=np.int32) - g = dgl.heterograph({("a", "connects", "a"): (src_ar, dst_ar)}) - cugraph_gs = cugraph_dgl.cugraph_storage_from_heterograph(g) - # Convert to homogeneous - g = dgl.to_homogeneous(g) - nodes = [6] - # Test for multiple fanouts - for fanout in [1, 2, 3]: - exp_g = g.sample_neighbors(nodes, fanout=fanout) - cu_g = cugraph_gs.sample_neighbors(nodes, fanout=fanout) - exp_src, exp_dst = exp_g.edges() - cu_src, cu_dst = cu_g.edges() - assert len(exp_src) == len(cu_src) - - # Test same results for all neighbours - exp_g = g.sample_neighbors(nodes, fanout=-1) - cu_g = cugraph_gs.sample_neighbors(nodes, fanout=-1) - exp_src, exp_dst = exp_g.edges() - exp_src, exp_dst = exp_src.numpy(), exp_dst.numpy() - - cu_src, cu_dst = cu_g.edges() - cu_src, cu_dst = cu_src.to("cpu").numpy(), cu_dst.to("cpu").numpy() - - # Assert same values sorted by src - exp_src_perm = exp_src.argsort() - exp_src = exp_src[exp_src_perm] - exp_dst = exp_dst[exp_src_perm] - - cu_src_perm = cu_src.argsort() - cu_src = cu_src[cu_src_perm] - cu_dst = cu_dst[cu_src_perm] - - np.testing.assert_equal(exp_dst, cu_dst) - np.testing.assert_equal(exp_src, cu_src) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py deleted file mode 100644 index 667a4a2e66d..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import pytest - -try: - import cugraph_dgl -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional -from .utils import ( - assert_same_edge_feats, - assert_same_edge_feats_daskapi, - assert_same_node_feats, - assert_same_node_feats_daskapi, - assert_same_num_edges_can_etypes, - assert_same_num_edges_etypes, - assert_same_num_nodes, -) - -th = import_optional("torch") -dgl = import_optional("dgl") -F = import_optional("dgl.backend") - - -def create_heterograph1(idtype): - ctx = th.device("cuda") - graph_data = { - ("nt.a", "join.1", "nt.a"): ( - F.tensor([0, 1, 2], dtype=idtype), - F.tensor([0, 1, 2], dtype=idtype), - ), - ("nt.a", "join.2", "nt.a"): ( - F.tensor([0, 1, 2], dtype=idtype), - F.tensor([0, 1, 2], dtype=idtype), - ), - } - g = dgl.heterograph(graph_data, device=th.device("cuda")) - g.nodes["nt.a"].data["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=ctx) - return g - - -def create_heterograph2(idtype): - ctx = th.device("cuda") - - g = dgl.heterograph( - { - ("user", "plays", "game"): ( - F.tensor([0, 1, 1, 2], dtype=idtype), - F.tensor([0, 0, 1, 1], dtype=idtype), - ), - ("developer", "develops", "game"): ( - F.tensor([0, 1], dtype=idtype), - F.tensor([0, 1], dtype=idtype), - ), - ("developer", "tests", "game"): ( - F.tensor([0, 1], dtype=idtype), - F.tensor([0, 1], dtype=idtype), - ), - }, - idtype=idtype, - device=th.device("cuda"), - ) - - g.nodes["user"].data["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=ctx) - g.nodes["user"].data["p"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=ctx) - g.nodes["game"].data["h"] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=ctx) - g.nodes["developer"].data["h"] = F.copy_to(F.tensor([3, 3], dtype=idtype), ctx=ctx) - g.edges["plays"].data["h"] = F.copy_to( - F.tensor([1, 1, 1, 1], dtype=idtype), ctx=ctx - ) - return g - - -def create_heterograph3(idtype): - ctx = th.device("cuda") - - g = dgl.heterograph( - { - ("user", "follows", "user"): ( - F.tensor([0, 1, 1, 2, 2, 2], dtype=idtype), - F.tensor([0, 0, 1, 1, 2, 2], dtype=idtype), - ), - ("user", "plays", "game"): ( - F.tensor([0, 1], dtype=idtype), - F.tensor([0, 1], dtype=idtype), - ), - }, - idtype=idtype, - device=th.device("cuda"), - ) - g.nodes["user"].data["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=ctx) - g.nodes["game"].data["h"] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=ctx) - g.edges["follows"].data["h"] = F.copy_to( - F.tensor([10, 20, 30, 40, 50, 60], dtype=idtype), ctx=ctx - ) - g.edges["follows"].data["p"] = F.copy_to( - F.tensor([1, 2, 3, 4, 5, 6], dtype=idtype), ctx=ctx - ) - g.edges["plays"].data["h"] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=ctx) - return g - - -def create_heterograph4(idtype): - ctx = th.device("cuda") - - g = dgl.heterograph( - { - ("user", "follows", "user"): ( - F.tensor([1, 2], dtype=idtype), - F.tensor([0, 1], dtype=idtype), - ), - ("user", "plays", "game"): ( - F.tensor([0, 1], dtype=idtype), - F.tensor([0, 1], dtype=idtype), - ), - }, - idtype=idtype, - device=th.device("cuda"), - ) - g.nodes["user"].data["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=ctx) - g.nodes["game"].data["h"] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=ctx) - g.edges["follows"].data["h"] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=ctx) - g.edges["plays"].data["h"] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=ctx) - return g - - -@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) -def test_heterograph_conversion_nodes_daskapi(idxtype): - graph_fs = [ - create_heterograph1, - create_heterograph2, - create_heterograph3, - create_heterograph4, - ] - for graph_f in graph_fs: - g = graph_f(idxtype) - gs = cugraph_dgl.cugraph_storage_from_heterograph(g) - - assert_same_num_nodes(gs, g) - assert_same_node_feats_daskapi(gs, g) - - -@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) -def test_heterograph_conversion_edges_daskapi(idxtype): - graph_fs = [ - create_heterograph1, - create_heterograph2, - create_heterograph3, - create_heterograph4, - ] - for graph_f in graph_fs: - g = graph_f(idxtype) - gs = cugraph_dgl.cugraph_storage_from_heterograph(g) - - assert_same_num_edges_can_etypes(gs, g) - assert_same_num_edges_etypes(gs, g) - assert_same_edge_feats_daskapi(gs, g) - - -@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) -def test_heterograph_conversion_nodes(idxtype): - graph_fs = [ - create_heterograph1, - create_heterograph2, - create_heterograph3, - create_heterograph4, - ] - for graph_f in graph_fs: - g = graph_f(idxtype) - gs = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g) - - assert_same_num_nodes(gs, g) - assert_same_node_feats(gs, g) - - -@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) -def test_heterograph_conversion_edges(idxtype): - graph_fs = [ - create_heterograph1, - create_heterograph2, - create_heterograph3, - create_heterograph4, - ] - for graph_f in graph_fs: - g = graph_f(idxtype) - gs = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g) - - assert_same_num_edges_can_etypes(gs, g) - assert_same_num_edges_etypes(gs, g) - assert_same_edge_feats(gs, g) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py deleted file mode 100644 index a60db97b8d6..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import cugraph_dgl -import pylibcugraph -import cupy -import numpy as np - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -torch = import_optional("torch") -dgl = import_optional("dgl") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -@pytest.mark.parametrize("direction", ["out", "in"]) -def test_graph_make_homogeneous_graph(direction): - df = karate.get_edgelist() - df.src = df.src.astype("int64") - df.dst = df.dst.astype("int64") - wgt = np.random.random((len(df),)) - - graph = cugraph_dgl.Graph() - num_nodes = max(df.src.max(), df.dst.max()) + 1 - node_x = np.random.random((num_nodes,)) - - graph.add_nodes( - num_nodes, data={"num": torch.arange(num_nodes, dtype=torch.int64), "x": node_x} - ) - graph.add_edges(df.src, df.dst, {"weight": wgt}) - plc_dgl_graph = graph._graph(direction=direction) - - assert graph.num_nodes() == num_nodes - assert graph.num_edges() == len(df) - assert graph.is_homogeneous - assert not graph.is_multi_gpu - - assert ( - graph.nodes() == torch.arange(num_nodes, dtype=torch.int64, device="cuda") - ).all() - - assert graph.nodes[None]["x"] is not None - assert (graph.nodes[None]["x"] == torch.as_tensor(node_x, device="cuda")).all() - assert ( - graph.nodes[None]["num"] - == torch.arange(num_nodes, dtype=torch.int64, device="cuda") - ).all() - - assert ( - graph.edges("eid", device="cuda") - == torch.arange(len(df), dtype=torch.int64, device="cuda") - ).all() - assert (graph.edges[None]["weight"] == torch.as_tensor(wgt, device="cuda")).all() - - plc_expected_graph = pylibcugraph.SGGraph( - pylibcugraph.ResourceHandle(), - pylibcugraph.GraphProperties(is_multigraph=True, is_symmetric=False), - df.src if direction == "out" else df.dst, - df.dst if direction == "out" else df.src, - vertices_array=cupy.arange(num_nodes, dtype="int64"), - ) - - # Do the expensive check to make sure this test fails if an invalid - # graph is constructed. - v_actual, d_in_actual, d_out_actual = pylibcugraph.degrees( - pylibcugraph.ResourceHandle(), - plc_dgl_graph, - source_vertices=cupy.arange(num_nodes, dtype="int64"), - do_expensive_check=True, - ) - - v_exp, d_in_exp, d_out_exp = pylibcugraph.degrees( - pylibcugraph.ResourceHandle(), - plc_expected_graph, - source_vertices=cupy.arange(num_nodes, dtype="int64"), - do_expensive_check=True, - ) - - assert (v_actual == v_exp).all() - assert (d_in_actual == d_in_exp).all() - assert (d_out_actual == d_out_exp).all() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -@pytest.mark.parametrize("direction", ["out", "in"]) -def test_graph_make_heterogeneous_graph(direction): - df = karate.get_edgelist() - df.src = df.src.astype("int64") - df.dst = df.dst.astype("int64") - - graph = cugraph_dgl.Graph() - total_num_nodes = max(df.src.max(), df.dst.max()) + 1 - - num_nodes_group_1 = total_num_nodes // 2 - num_nodes_group_2 = total_num_nodes - num_nodes_group_1 - - node_x_1 = np.random.random((num_nodes_group_1,)) - node_x_2 = np.random.random((num_nodes_group_2,)) - - graph.add_nodes(num_nodes_group_1, {"x": node_x_1}, "type1") - graph.add_nodes(num_nodes_group_2, {"x": node_x_2}, "type2") - - edges_11 = df[(df.src < num_nodes_group_1) & (df.dst < num_nodes_group_1)] - edges_12 = df[(df.src < num_nodes_group_1) & (df.dst >= num_nodes_group_1)] - edges_21 = df[(df.src >= num_nodes_group_1) & (df.dst < num_nodes_group_1)] - edges_22 = df[(df.src >= num_nodes_group_1) & (df.dst >= num_nodes_group_1)] - - edges_12.dst -= num_nodes_group_1 - edges_21.src -= num_nodes_group_1 - edges_22.dst -= num_nodes_group_1 - edges_22.src -= num_nodes_group_1 - - graph.add_edges(edges_11.src, edges_11.dst, etype=("type1", "e1", "type1")) - graph.add_edges(edges_12.src, edges_12.dst, etype=("type1", "e2", "type2")) - graph.add_edges(edges_21.src, edges_21.dst, etype=("type2", "e3", "type1")) - graph.add_edges(edges_22.src, edges_22.dst, etype=("type2", "e4", "type2")) - - assert not graph.is_homogeneous - assert not graph.is_multi_gpu - - # Verify graph.nodes() - assert ( - graph.nodes() == torch.arange(total_num_nodes, dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.nodes("type1") - == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.nodes("type2") - == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") - ).all() - - # Verify graph.edges() - assert ( - graph.edges("eid", etype=("type1", "e1", "type1")) - == torch.arange(len(edges_11), dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.edges("eid", etype=("type1", "e2", "type2")) - == torch.arange(len(edges_12), dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.edges("eid", etype=("type2", "e3", "type1")) - == torch.arange(len(edges_21), dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.edges("eid", etype=("type2", "e4", "type2")) - == torch.arange(len(edges_22), dtype=torch.int64, device="cuda") - ).all() - - # Use sampling call to check graph creation - # This isn't a test of cuGraph sampling with DGL; the options are - # set to verify the graph only. - plc_graph = graph._graph(direction) - sampling_output = pylibcugraph.uniform_neighbor_sample( - pylibcugraph.ResourceHandle(), - plc_graph, - start_list=cupy.arange(total_num_nodes, dtype="int64"), - h_fan_out=np.array([1, 1], dtype="int32"), - with_replacement=False, - do_expensive_check=True, - with_edge_properties=True, - prior_sources_behavior="exclude", - return_dict=True, - ) - - expected_etypes = { - 0: "e1", - 1: "e2", - 2: "e3", - 3: "e4", - } - expected_offsets = { - 0: (0, 0), - 1: (0, num_nodes_group_1), - 2: (num_nodes_group_1, 0), - 3: (num_nodes_group_1, num_nodes_group_1), - } - if direction == "in": - src_col = "minors" - dst_col = "majors" - else: - src_col = "majors" - dst_col = "minors" - - # Looping over the output verifies that all edges are valid - # (and therefore, the graph is valid) - for i, etype in enumerate(sampling_output["edge_type"].tolist()): - eid = int(sampling_output["edge_id"][i]) - - srcs, dsts, eids = graph.edges( - "all", etype=expected_etypes[etype], device="cpu" - ) - - assert eids[eid] == eid - assert ( - srcs[eid] == int(sampling_output[src_col][i]) - expected_offsets[etype][0] - ) - assert ( - dsts[eid] == int(sampling_output[dst_col][i]) - expected_offsets[etype][1] - ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py deleted file mode 100644 index eedda664c52..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import pytest - -import cugraph_dgl -import pylibcugraph -import cupy -import numpy as np - -import cudf - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph.gnn import ( - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, - cugraph_comms_get_raft_handle, -) - -from .utils import init_pytorch_worker - -pylibwholegraph = import_optional("pylibwholegraph") -torch = import_optional("torch") -dgl = import_optional("dgl") - - -def run_test_graph_make_homogeneous_graph_mg(rank, uid, world_size, direction): - init_pytorch_worker(rank, world_size, uid, init_wholegraph=True) - - df = karate.get_edgelist() - df.src = df.src.astype("int64") - df.dst = df.dst.astype("int64") - wgt = np.random.random((len(df),)) - - graph = cugraph_dgl.Graph( - is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" - ) - - # The number of nodes is set globally but features can have - # any distribution across workers as long as they are in order. - global_num_nodes = max(df.src.max(), df.dst.max()) + 1 - node_x = np.array_split(np.arange(global_num_nodes, dtype="int64"), world_size)[ - rank - ] - - # Each worker gets a shuffled, permuted version of the edgelist - df = df.sample(frac=1.0) - df.src = (df.src + rank) % global_num_nodes - df.dst = (df.dst + rank + 1) % global_num_nodes - - graph.add_nodes(global_num_nodes, data={"x": node_x}) - graph.add_edges(df.src, df.dst, {"weight": wgt}) - plc_dgl_graph = graph._graph(direction=direction) - - assert graph.num_nodes() == global_num_nodes - assert graph.num_edges() == len(df) * world_size - assert graph.is_homogeneous - assert graph.is_multi_gpu - - assert ( - graph.nodes() - == torch.arange(global_num_nodes, dtype=torch.int64, device="cuda") - ).all() - ix = torch.arange(len(node_x) * rank, len(node_x) * (rank + 1), dtype=torch.int64) - assert graph.nodes[ix]["x"] is not None - assert (graph.nodes[ix]["x"] == torch.as_tensor(node_x, device="cuda")).all() - - assert ( - graph.edges("eid", device="cuda") - == torch.arange(world_size * len(df), dtype=torch.int64, device="cuda") - ).all() - ix = torch.arange(len(df) * rank, len(df) * (rank + 1), dtype=torch.int64) - assert (graph.edges[ix]["weight"] == torch.as_tensor(wgt, device="cuda")).all() - - plc_handle = pylibcugraph.ResourceHandle( - cugraph_comms_get_raft_handle().getHandle() - ) - - plc_expected_graph = pylibcugraph.MGGraph( - plc_handle, - pylibcugraph.GraphProperties(is_multigraph=True, is_symmetric=False), - [df.src] if direction == "out" else [df.dst], - [df.dst] if direction == "out" else [df.src], - vertices_array=[ - cupy.array_split(cupy.arange(global_num_nodes, dtype="int64"), world_size)[ - rank - ] - ], - ) - - # Do the expensive check to make sure this test fails if an invalid - # graph is constructed. - v_actual, d_in_actual, d_out_actual = pylibcugraph.degrees( - plc_handle, - plc_dgl_graph, - source_vertices=cupy.arange(global_num_nodes, dtype="int64"), - do_expensive_check=True, - ) - - v_exp, d_in_exp, d_out_exp = pylibcugraph.degrees( - plc_handle, - plc_expected_graph, - source_vertices=cupy.arange(global_num_nodes, dtype="int64"), - do_expensive_check=True, - ) - - assert (v_actual == v_exp).all() - assert (d_in_actual == d_in_exp).all() - assert (d_out_actual == d_out_exp).all() - - cugraph_comms_shutdown() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif( - isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" -) -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -@pytest.mark.parametrize("direction", ["out", "in"]) -def test_graph_make_homogeneous_graph_mg(direction): - uid = cugraph_comms_create_unique_id() - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_graph_make_homogeneous_graph_mg, - args=( - uid, - world_size, - direction, - ), - nprocs=world_size, - ) - - -def run_test_graph_make_heterogeneous_graph_mg(rank, uid, world_size, direction): - init_pytorch_worker(rank, world_size, uid) - - df = karate.get_edgelist() - df.src = df.src.astype("int64") - df.dst = df.dst.astype("int64") - - graph = cugraph_dgl.Graph(is_multi_gpu=True) - total_num_nodes = max(df.src.max(), df.dst.max()) + 1 - - # Each worker gets a shuffled, permuted version of the edgelist - df = df.sample(frac=1.0) - df.src = (df.src + rank) % total_num_nodes - df.dst = (df.dst + rank + 1) % total_num_nodes - - num_nodes_group_1 = total_num_nodes // 2 - num_nodes_group_2 = total_num_nodes - num_nodes_group_1 - - node_x_1 = np.array_split(np.random.random((num_nodes_group_1,)), world_size)[rank] - node_x_2 = np.array_split(np.random.random((num_nodes_group_2,)), world_size)[rank] - - graph.add_nodes(num_nodes_group_1, {"x": node_x_1}, "type1") - graph.add_nodes(num_nodes_group_2, {"x": node_x_2}, "type2") - - edges_11 = df[(df.src < num_nodes_group_1) & (df.dst < num_nodes_group_1)] - edges_12 = df[(df.src < num_nodes_group_1) & (df.dst >= num_nodes_group_1)] - edges_21 = df[(df.src >= num_nodes_group_1) & (df.dst < num_nodes_group_1)] - edges_22 = df[(df.src >= num_nodes_group_1) & (df.dst >= num_nodes_group_1)] - - edges_12.dst -= num_nodes_group_1 - edges_21.src -= num_nodes_group_1 - edges_22.dst -= num_nodes_group_1 - edges_22.src -= num_nodes_group_1 - - total_edges_11 = torch.tensor(len(edges_11), device="cuda", dtype=torch.int64) - torch.distributed.all_reduce(total_edges_11, torch.distributed.ReduceOp.SUM) - total_edges_12 = torch.tensor(len(edges_12), device="cuda", dtype=torch.int64) - torch.distributed.all_reduce(total_edges_12, torch.distributed.ReduceOp.SUM) - total_edges_21 = torch.tensor(len(edges_21), device="cuda", dtype=torch.int64) - torch.distributed.all_reduce(total_edges_21, torch.distributed.ReduceOp.SUM) - total_edges_22 = torch.tensor(len(edges_22), device="cuda", dtype=torch.int64) - torch.distributed.all_reduce(total_edges_22, torch.distributed.ReduceOp.SUM) - - graph.add_edges(edges_11.src, edges_11.dst, etype=("type1", "e1", "type1")) - graph.add_edges(edges_12.src, edges_12.dst, etype=("type1", "e2", "type2")) - graph.add_edges(edges_21.src, edges_21.dst, etype=("type2", "e3", "type1")) - graph.add_edges(edges_22.src, edges_22.dst, etype=("type2", "e4", "type2")) - - assert not graph.is_homogeneous - assert graph.is_multi_gpu - - # Verify graph.nodes() - assert ( - graph.nodes() == torch.arange(total_num_nodes, dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.nodes("type1") - == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.nodes("type2") - == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") - ).all() - - # Verify graph.edges() - assert ( - graph.edges("eid", etype=("type1", "e1", "type1")) - == torch.arange(total_edges_11, dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.edges("eid", etype=("type1", "e2", "type2")) - == torch.arange(total_edges_12, dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.edges("eid", etype=("type2", "e3", "type1")) - == torch.arange(total_edges_21, dtype=torch.int64, device="cuda") - ).all() - assert ( - graph.edges("eid", etype=("type2", "e4", "type2")) - == torch.arange(total_edges_22, dtype=torch.int64, device="cuda") - ).all() - - # Use sampling call to check graph creation - # This isn't a test of cuGraph sampling with DGL; the options are - # set to verify the graph only. - plc_graph = graph._graph(direction) - assert isinstance(plc_graph, pylibcugraph.MGGraph) - sampling_output = pylibcugraph.uniform_neighbor_sample( - graph._resource_handle, - plc_graph, - start_list=cupy.arange(total_num_nodes, dtype="int64"), - batch_id_list=cupy.full(total_num_nodes, rank, dtype="int32"), - label_list=cupy.arange(world_size, dtype="int32"), - label_to_output_comm_rank=cupy.arange(world_size, dtype="int32"), - h_fan_out=np.array([-1], dtype="int32"), - with_replacement=False, - do_expensive_check=True, - with_edge_properties=True, - prior_sources_behavior="exclude", - return_dict=True, - ) - - sdf = cudf.DataFrame( - { - "majors": sampling_output["majors"], - "minors": sampling_output["minors"], - "edge_id": sampling_output["edge_id"], - "edge_type": sampling_output["edge_type"], - } - ) - - expected_offsets = { - 0: (0, 0), - 1: (0, num_nodes_group_1), - 2: (num_nodes_group_1, 0), - 3: (num_nodes_group_1, num_nodes_group_1), - } - if direction == "in": - src_col = "minors" - dst_col = "majors" - else: - src_col = "majors" - dst_col = "minors" - - edges_11["etype"] = 0 - edges_12["etype"] = 1 - edges_21["etype"] = 2 - edges_22["etype"] = 3 - - cdf = cudf.concat([edges_11, edges_12, edges_21, edges_22]) - for i in range(len(cdf)): - row = cdf.iloc[i] - etype = row["etype"] - src = row["src"] + expected_offsets[etype][0] - dst = row["dst"] + expected_offsets[etype][1] - - f = sdf[ - (sdf[src_col] == src) & (sdf[dst_col] == dst) & (sdf["edge_type"] == etype) - ] - assert len(f) > 0 # may be multiple, some could be on other GPU - - cugraph_comms_shutdown() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif( - isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" -) -@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") -@pytest.mark.parametrize("direction", ["out", "in"]) -def test_graph_make_heterogeneous_graph_mg(direction): - uid = cugraph_comms_create_unique_id() - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_graph_make_heterogeneous_graph_mg, - args=( - uid, - world_size, - direction, - ), - nprocs=world_size, - ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py b/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py deleted file mode 100644 index 4be66758b43..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cudf -import cupy as cp -import numpy as np -from cugraph_dgl.dataloading.utils.sampling_helpers import ( - cast_to_tensor, - _get_renumber_map, - _split_tensor, - _get_tensor_d_from_sampled_df, - create_homogeneous_sampled_graphs_from_dataframe, - _get_source_destination_range, - _create_homogeneous_cugraph_dgl_nn_sparse_graph, - create_homogeneous_sampled_graphs_from_dataframe_csc, -) -from cugraph.utilities.utils import import_optional - -dgl = import_optional("dgl") -torch = import_optional("torch") -cugraph_dgl = import_optional("cugraph_dgl") - - -def test_casting_empty_array(): - ar = cp.zeros(shape=0, dtype=cp.int32) - ser = cudf.Series(ar) - output_tensor = cast_to_tensor(ser) - assert output_tensor.dtype == torch.int32 - - -def get_dummy_sampled_df(): - df = cudf.DataFrame() - df["sources"] = [0, 0, 1, 0, 0, 1, 0, 0, 2] + [np.nan] * 4 - df["destinations"] = [1, 2, 0, 1, 2, 1, 2, 0, 1] + [np.nan] * 4 - df["batch_id"] = [0, 0, 0, 1, 1, 1, 2, 2, 2] + [np.nan] * 4 - df["hop_id"] = [0, 1, 1, 0, 1, 1, 0, 1, 1] + [np.nan] * 4 - df["map"] = [4, 7, 10, 13, 10, 11, 12, 13, 14, 15, 16, 17, 18] - df = df.astype("int32") - df["hop_id"] = df["hop_id"].astype("uint8") - df["map"] = df["map"].astype("int64") - return df - - -def get_dummy_sampled_df_csc(): - df_dict = dict( - minors=np.array( - [1, 1, 2, 1, 0, 3, 1, 3, 2, 3, 2, 4, 0, 1, 1, 0, 3, 2], dtype=np.int32 - ), - major_offsets=np.arange(19, dtype=np.int64), - map=np.array( - [26, 29, 33, 22, 23, 32, 18, 29, 33, 33, 8, 30, 32], dtype=np.int32 - ), - renumber_map_offsets=np.array([0, 4, 9, 13], dtype=np.int64), - label_hop_offsets=np.array([0, 1, 3, 6, 7, 9, 13, 14, 16, 18], dtype=np.int64), - ) - - # convert values to Series so that NaNs are padded automatically - return cudf.DataFrame({k: cudf.Series(v) for k, v in df_dict.items()}) - - -def test_get_renumber_map(): - - sampled_df = get_dummy_sampled_df() - - df, renumber_map, renumber_map_batch_indices = _get_renumber_map(sampled_df) - - # Ensure that map was dropped - assert "map" not in df.columns - - expected_map = torch.as_tensor( - [10, 11, 12, 13, 14, 15, 16, 17, 18], dtype=torch.int32, device="cuda" - ) - assert torch.equal(renumber_map, expected_map) - - expected_batch_indices = torch.as_tensor([3, 6], dtype=torch.int32, device="cuda") - assert torch.equal(renumber_map_batch_indices, expected_batch_indices) - - # Ensure we dropped the Nans for rows corresponding to the renumber_map - assert len(df) == 9 - - t_ls = _split_tensor(renumber_map, renumber_map_batch_indices) - assert torch.equal( - t_ls[0], torch.as_tensor([10, 11, 12], dtype=torch.int64, device="cuda") - ) - assert torch.equal( - t_ls[1], torch.as_tensor([13, 14, 15], dtype=torch.int64, device="cuda") - ) - assert torch.equal( - t_ls[2], torch.as_tensor([16, 17, 18], dtype=torch.int64, device="cuda") - ) - - -def test_get_tensor_d_from_sampled_df(): - df = get_dummy_sampled_df() - tensor_d = _get_tensor_d_from_sampled_df(df) - - expected_maps = {} - expected_maps[0] = torch.as_tensor([10, 11, 12], dtype=torch.int64, device="cuda") - expected_maps[1] = torch.as_tensor([13, 14, 15], dtype=torch.int64, device="cuda") - expected_maps[2] = torch.as_tensor([16, 17, 18], dtype=torch.int64, device="cuda") - - for batch_id, batch_td in tensor_d.items(): - batch_df = df[df["batch_id"] == batch_id] - for hop_id, hop_t in batch_td.items(): - if hop_id != "map": - hop_df = batch_df[batch_df["hop_id"] == hop_id] - assert torch.equal(hop_t["sources"], cast_to_tensor(hop_df["sources"])) - assert torch.equal( - hop_t["destinations"], cast_to_tensor(hop_df["destinations"]) - ) - - assert torch.equal(batch_td["map"], expected_maps[batch_id]) - - -def test_create_homogeneous_sampled_graphs_from_dataframe(): - sampler = dgl.dataloading.MultiLayerNeighborSampler([2, 2]) - g = dgl.graph(([0, 10, 20], [0, 0, 10])).to("cuda") - dgl_input_nodes, dgl_output_nodes, dgl_blocks = sampler.sample_blocks( - g, torch.as_tensor([0]).to("cuda") - ) - - # Directions are reversed in dgl - s1, d1 = dgl_blocks[0].edges() - s0, d0 = dgl_blocks[1].edges() - srcs = cp.concatenate([cp.asarray(s0), cp.asarray(s1)]) - dsts = cp.concatenate([cp.asarray(d0), cp.asarray(d1)]) - - nids = dgl_blocks[0].srcdata[dgl.NID] - nids = cp.concatenate( - [cp.asarray([2]), cp.asarray([len(nids) + 2]), cp.asarray(nids)] - ) - - df = cudf.DataFrame() - df["sources"] = srcs - df["destinations"] = dsts - df["hop_id"] = [0] * len(s0) + [1] * len(s1) - df["batch_id"] = 0 - df["map"] = nids - - ( - cugraph_input_nodes, - cugraph_output_nodes, - cugraph_blocks, - ) = create_homogeneous_sampled_graphs_from_dataframe(df)[0] - - assert torch.equal(dgl_input_nodes, cugraph_input_nodes) - assert torch.equal(dgl_output_nodes, cugraph_output_nodes) - - for c_block, d_block in zip(cugraph_blocks, dgl_blocks): - ce, cd = c_block.edges() - de, dd = d_block.edges() - assert torch.equal(ce, de) - assert torch.equal(cd, dd) - - -def test_get_source_destination_range(): - df = get_dummy_sampled_df() - output_d = _get_source_destination_range(df) - - expected_output = { - (0, 0): {"sources_range": 0, "destinations_range": 1}, - (0, 1): {"sources_range": 1, "destinations_range": 2}, - (1, 0): {"sources_range": 0, "destinations_range": 1}, - (1, 1): {"sources_range": 1, "destinations_range": 2}, - (2, 0): {"sources_range": 0, "destinations_range": 2}, - (2, 1): {"sources_range": 2, "destinations_range": 1}, - } - - assert output_d == expected_output - - -def test__create_homogeneous_cugraph_dgl_nn_sparse_graph(): - tensor_d = { - "sources_range": 1, - "destinations_range": 2, - "sources": torch.as_tensor([0, 0, 1, 1], dtype=torch.int64, device="cuda"), - "destinations": torch.as_tensor([0, 0, 1, 2], dtype=torch.int64, device="cuda"), - } - - seednodes_range = 10 - sparse_graph = _create_homogeneous_cugraph_dgl_nn_sparse_graph( - tensor_d, seednodes_range - ) - assert sparse_graph.num_src_nodes() == 2 - assert sparse_graph.num_dst_nodes() == seednodes_range + 1 - assert isinstance(sparse_graph, cugraph_dgl.nn.SparseGraph) - - -def test_create_homogeneous_sampled_graphs_from_dataframe_csc(): - df = get_dummy_sampled_df_csc() - batches = create_homogeneous_sampled_graphs_from_dataframe_csc(df) - - assert len(batches) == 3 - assert torch.equal(batches[0][0], torch.IntTensor([26, 29, 33, 22]).cuda()) - assert torch.equal(batches[1][0], torch.IntTensor([23, 32, 18, 29, 33]).cuda()) - assert torch.equal(batches[2][0], torch.IntTensor([33, 8, 30, 32]).cuda()) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/utils.py b/python/cugraph-dgl/cugraph_dgl/tests/utils.py deleted file mode 100644 index fa4eb05f297..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/tests/utils.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from cugraph.utilities.utils import import_optional -from cugraph.gnn import cugraph_comms_init - -th = import_optional("torch") - - -def assert_same_node_feats_daskapi(gs, g): - assert set(gs.ndata.keys()) == set(g.ndata.keys()) - - for key in g.ndata.keys(): - for ntype in g.ntypes: - indices = th.arange(0, g.num_nodes(ntype), dtype=g.idtype).cuda() - if len(g.ntypes) <= 1 or ntype in g.ndata[key]: - g_output = g.get_node_storage(key=key, ntype=ntype).fetch( - indices, device="cuda" - ) - gs_output = gs.get_node_storage(key=key, ntype=ntype).fetch(indices) - equal_t = (gs_output != g_output).sum().cpu() - assert equal_t == 0 - - -def assert_same_node_feats(gs, g): - assert set(gs.ndata.keys()) == set(g.ndata.keys()) - assert set(gs.ntypes) == set(g.ntypes) - - for key in g.ndata.keys(): - for ntype in g.ntypes: - if len(g.ntypes) <= 1 or ntype in g.ndata[key]: - indices = th.arange(0, g.num_nodes(ntype), dtype=g.idtype) - - g_output = g.ndata[key] - gs_output = gs.ndata[key] - - if len(g.ntypes) > 1: - g_output = g_output[ntype] - gs_output = gs_output[ntype] - - g_output = g_output[indices] - gs_output = gs_output[indices] - - equal_t = (gs_output != g_output).sum() - assert equal_t == 0 - - -def assert_same_num_nodes(gs, g): - for ntype in g.ntypes: - assert g.num_nodes(ntype) == gs.num_nodes(ntype) - - -def assert_same_num_edges_can_etypes(gs, g): - for can_etype in g.canonical_etypes: - assert g.num_edges(can_etype) == gs.num_edges(can_etype) - - -def assert_same_num_edges_etypes(gs, g): - for etype in g.etypes: - assert g.num_edges(etype) == gs.num_edges(etype) - - -def assert_same_edge_feats_daskapi(gs, g): - assert set(gs.edata.keys()) == set(g.edata.keys()) - for key in g.edata.keys(): - for etype in g.canonical_etypes: - indices = th.arange(0, g.num_edges(etype), dtype=g.idtype).cuda() - if len(g.etypes) <= 1 or etype in g.edata[key]: - g_output = g.get_edge_storage(key=key, etype=etype).fetch( - indices, device="cuda" - ) - gs_output = gs.get_edge_storage(key=key, etype=etype).fetch(indices) - equal_t = (gs_output != g_output).sum().cpu() - assert equal_t == 0 - - -def assert_same_edge_feats(gs, g): - assert set(gs.edata.keys()) == set(g.edata.keys()) - assert set(gs.canonical_etypes) == set(g.canonical_etypes) - assert set(gs.etypes) == set(g.etypes) - - for key in g.edata.keys(): - for etype in g.canonical_etypes: - if len(g.etypes) <= 1 or etype in g.edata[key]: - indices = th.arange(0, g.num_edges(etype), dtype=g.idtype).cuda() - g_output = g.edata[key] - gs_output = gs.edata[key] - - if len(g.etypes) > 1: - g_output = g_output[etype] - gs_output = gs_output[etype] - - g_output = g_output[indices] - gs_output = gs_output[indices] - - equal_t = (gs_output != g_output).sum().cpu() - assert equal_t == 0 - - -def assert_same_sampling_len(dgl_g, cugraph_gs, nodes, fanout, edge_dir): - dgl_o = dgl_g.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) - cugraph_o = cugraph_gs.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) - assert cugraph_o.num_edges() == dgl_o.num_edges() - for etype in dgl_o.canonical_etypes: - assert dgl_o.num_edges(etype) == cugraph_o.num_edges(etype) - - -def init_pytorch_worker(rank, world_size, cugraph_id, init_wholegraph=False): - import rmm - - rmm.reinitialize( - devices=rank, - ) - - import cupy - - cupy.cuda.Device(rank).use() - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - th.cuda.set_device(rank) - - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - th.distributed.init_process_group("nccl", rank=rank, world_size=world_size) - - if init_wholegraph: - import pylibwholegraph - - pylibwholegraph.torch.initialize.init( - rank, - world_size, - rank, - world_size, - ) - - cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) diff --git a/python/cugraph-dgl/cugraph_dgl/typing.py b/python/cugraph-dgl/cugraph_dgl/typing.py deleted file mode 100644 index a68463c3fd9..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/typing.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List, Union, Tuple -from cugraph.utilities.utils import import_optional - -from cugraph_dgl.nn import SparseGraph - -import pandas -import numpy -import cupy -import cudf - -torch = import_optional("torch") -dgl = import_optional("dgl") - -TensorType = Union[ - "torch.Tensor", - "cupy.ndarray", - "numpy.ndarray", - "cudf.Series", - "pandas.Series", - List[int], -] - -DGLSamplerOutput = Tuple[ - "torch.Tensor", - "torch.Tensor", - List[Union["dgl.Block", SparseGraph]], -] diff --git a/python/cugraph-dgl/cugraph_dgl/utils/__init__.py b/python/cugraph-dgl/cugraph_dgl/utils/__init__.py deleted file mode 100644 index 081b2ae8260..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/utils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py deleted file mode 100644 index 2ba04bd916f..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Utils to convert b/w dgl heterograph to cugraph GraphStore -from __future__ import annotations -from typing import Dict, Tuple, Union - -from cugraph_dgl.typing import TensorType - -import cudf -import pandas as pd -import dask.dataframe as dd -import dask_cudf -from dask.distributed import get_client -import cupy as cp -import numpy as np -from cugraph.utilities.utils import import_optional -from cugraph.gnn.dgl_extensions.dgl_uniform_sampler import src_n, dst_n - -dgl = import_optional("dgl") -F = import_optional("dgl.backend") -torch = import_optional("torch") - - -# Feature Tensor to DataFrame Utils -def convert_to_column_major(t: torch.Tensor): - return t.t().contiguous().t() - - -def create_ar_from_tensor(t: torch.Tensor): - t = convert_to_column_major(t) - if t.device.type == "cuda": - ar = cp.asarray(t) - else: - ar = t.numpy() - return ar - - -def _create_edge_frame(src_t: torch.Tensor, dst_t: torch.Tensor, single_gpu: bool): - """ - Create a edge dataframe from src_t and dst_t - """ - src_ar = create_ar_from_tensor(src_t) - dst_ar = create_ar_from_tensor(dst_t) - edge_df = _create_df_from_edge_ar(src_ar, dst_ar, single_gpu=single_gpu) - edge_df = edge_df.rename( - columns={edge_df.columns[0]: src_n, edge_df.columns[1]: dst_n} - ) - return edge_df - - -def _create_df_from_edge_ar(src_ar, dst_ar, single_gpu=True): - if not single_gpu: - nworkers = len(get_client().scheduler_info()["workers"]) - npartitions = nworkers * 1 - if single_gpu: - df = cudf.DataFrame(data={src_n: src_ar, dst_n: dst_ar}) - else: - if isinstance(src_ar, cp.ndarray): - src_ar = src_ar.get() - if isinstance(dst_ar, cp.ndarray): - dst_ar = dst_ar.get() - - df = pd.DataFrame(data={src_n: src_ar, dst_n: dst_ar}) - # Only save stuff in host memory - df = dd.from_pandas(df, npartitions=npartitions).persist() - df = df.map_partitions(cudf.DataFrame.from_pandas) - - df = df.reset_index(drop=True) - return df - - -def get_edges_dict_from_dgl_HeteroGraph( - graph: dgl.DGLHeteroGraph, single_gpu: bool -) -> Dict[Tuple[str, str, str], Union[cudf.DataFrame, dask_cudf.DataFrame]]: - etype_d = {} - for can_etype in graph.canonical_etypes: - src_t, dst_t = graph.edges(form="uv", etype=can_etype) - etype_d[can_etype] = _create_edge_frame(src_t, dst_t, single_gpu) - return etype_d - - -def add_ndata_from_dgl_HeteroGraph(gs, g): - for feat_name, feat in g.ndata.items(): - if isinstance(feat, torch.Tensor): - assert len(g.ntypes) == 1 - ntype = g.ntypes[0] - gs.ndata_storage.add_data( - feat_name=feat_name, type_name=ntype, feat_obj=feat - ) - else: - for ntype, feat_t in feat.items(): - gs.ndata_storage.add_data( - feat_name=feat_name, type_name=ntype, feat_obj=feat_t - ) - - -def add_edata_from_dgl_HeteroGraph(gs, g): - for feat_name, feat in g.edata.items(): - if isinstance(feat, torch.Tensor): - assert len(g.etypes) == 1 - etype = g.etypes[0] - gs.edata_storage.add_data( - feat_name=feat_name, type_name=etype, feat_obj=feat - ) - else: - for etype, feat_t in feat.items(): - gs.edata_storage.add_data( - feat_name=feat_name, type_name=etype, feat_obj=feat_t - ) - - -def _cast_to_torch_tensor(t: TensorType) -> "torch.Tensor": - if isinstance(t, torch.Tensor): - return t - elif isinstance(t, (cp.ndarray, cudf.Series)): - return torch.as_tensor(t, device="cuda") - elif isinstance(t, (pd.Series, np.ndarray)): - return torch.as_tensor(t, device="cpu") - return torch.as_tensor(t) diff --git a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_storage_utils.py b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_storage_utils.py deleted file mode 100644 index cc23aa910a5..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_storage_utils.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -from cugraph.gnn.dgl_extensions.utils.sampling import eid_n, src_n, dst_n -from cugraph.utilities.utils import import_optional, MissingModule - -dgl = import_optional("dgl") -F = import_optional("dgl.backend") - - -def _assert_valid_canonical_etype(canonical_etype): - if not _is_valid_canonical_etype: - error_message = ( - f"Invalid canonical_etype {canonical_etype} " - + "canonical etype should be is a string triplet (str, str, str)" - + "for source node type, edge type and destination node type" - ) - raise dgl.DGLError(error_message) - - -def _is_valid_canonical_etype(canonical_etype): - if not isinstance(canonical_etype, tuple): - return False - - if len(canonical_etype) != 3: - return False - - for t in canonical_etype: - if not isinstance(t, str): - return False - return True - - -def add_edge_ids_to_edges_dict(edge_data_dict, edge_id_offset_d, id_dtype): - eids_data_dict = {} - for etype, df in edge_data_dict.items(): - # Do not modify input by user - if len(df.columns) != 2: - raise ValueError( - "Provided dataframe in edge_dict contains more than 2 columns", - "DataFrame with only 2 columns is supported", - "Where first is treated as src and second as dst", - ) - df = df.copy(deep=False) - df = df.rename(columns={df.columns[0]: src_n, df.columns[1]: dst_n}) - df[eid_n] = id_dtype(1) - df[eid_n] = df[eid_n].cumsum() - df[eid_n] = df[eid_n] + edge_id_offset_d[etype] - 1 - df[eid_n] = df[eid_n].astype(id_dtype) - eids_data_dict[etype] = df - return eids_data_dict - - -def add_node_offset_to_edges_dict(edge_data_dict, node_id_offset_d): - for etype, df in edge_data_dict.items(): - src_type, _, dst_type = etype - df[src_n] = df[src_n] + node_id_offset_d[src_type] - df[dst_n] = df[dst_n] + node_id_offset_d[dst_type] - return edge_data_dict - - -if isinstance(F, MissingModule): - backend_dtype_to_np_dtype_dict = MissingModule("dgl") -else: - backend_dtype_to_np_dtype_dict = { - F.bool: bool, - F.uint8: np.uint8, - F.int8: np.int8, - F.int16: np.int16, - F.int32: np.int32, - F.int64: np.int64, - F.float16: np.float16, - F.float32: np.float32, - F.float64: np.float64, - } diff --git a/python/cugraph-dgl/cugraph_dgl/utils/feature_storage.py b/python/cugraph-dgl/cugraph_dgl/utils/feature_storage.py deleted file mode 100644 index 31917661557..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/utils/feature_storage.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations -from cugraph.gnn import FeatureStore -from cugraph.utilities.utils import import_optional - -torch = import_optional("torch") - - -class dgl_FeatureStorage: - """ - Storage for node/edge feature data. - """ - - def __init__(self, fs: FeatureStore, type_name: str, feat_name: str): - self.fs = fs - self.type_name = type_name - self.feat_name = feat_name - - def fetch(self, indices, device=None, pin_memory=False, **kwargs): - """Fetch the features of the given node/edge IDs to the - given device. - Parameters - ---------- - indices : Tensor - Node or edge IDs. - device : Device - Device context. - pin_memory : bool - Wether to use pin_memory for fetching features - pin_memory=True is currently not supported - - Returns - ------- - Tensor - Feature data stored in PyTorch Tensor. - """ - if pin_memory: - raise ValueError("pinned memory not supported in dgl_FeatureStorage") - if isinstance(indices, torch.Tensor): - indices = indices.long() - t = self.fs.get_data( - indices=indices, type_name=self.type_name, feat_name=self.feat_name - ) - if device: - return t.to(device) - else: - return t diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py deleted file mode 100644 index 4de9406be07..00000000000 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import warnings - -from collections import defaultdict -from collections.abc import MutableMapping -from typing import Union, Dict, List, Tuple - -from cugraph.utilities.utils import import_optional - -import cugraph_dgl -from cugraph_dgl.typing import TensorType -from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor - -torch = import_optional("torch") -dgl = import_optional("dgl") - - -class EmbeddingView: - def __init__(self, storage: "dgl.storages.base.FeatureStorage", ld: int): - self.__ld = ld - self.__storage = storage - - def __getitem__(self, u: TensorType) -> "torch.Tensor": - u = _cast_to_torch_tensor(u) - try: - return self.__storage.fetch( - u, - "cuda", - ) - except RuntimeError as ex: - warnings.warn( - "Got error accessing data, trying again with index on device: " - + str(ex) - ) - return self.__storage.fetch( - u.cuda(), - "cuda", - ) - - @property - def shape(self) -> "torch.Size": - try: - f = self.__storage.fetch(torch.tensor([0]), "cpu") - except RuntimeError: - f = self.__storage.fetch(torch.tensor([0], device="cuda"), "cuda") - sz = [s for s in f.shape] - sz[0] = self.__ld - return torch.Size(tuple(sz)) - - -class HeteroEdgeDataView(MutableMapping): - """ - Duck-typed version of DGL's HeteroEdgeDataView. - Used for accessing and modifying edge features. - """ - - def __init__( - self, - graph: "cugraph_dgl.Graph", - etype: Union[Tuple[str, str, str], List[Tuple[str, str, str]]], - edges: TensorType, - ): - self.__graph = graph - self.__etype = etype - self.__edges = edges - - @property - def _etype(self) -> Tuple[str, str, str]: - return self.__etype - - @property - def _graph(self) -> "cugraph_dgl.Graph": - return self.__graph - - @property - def _edges(self) -> TensorType: - return self.__edges - - def __getitem__(self, key: str): - if isinstance(self._etype, list): - return { - t: self._graph._get_e_emb(t, key, self._edges) - for t in self._etype - if self._graph._has_e_emb(t, key) - } - - return self._graph._get_e_emb(self._etype, key, self._edges) - - def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): - if isinstance(self._etype, list): - if not isinstance(val, dict): - raise ValueError( - "There are multiple edge types in this view. " - "Expected a dictionary of values." - ) - for t, v in val.items(): - if t not in self._etype: - raise ValueError("Attempted to modify a type out of view.") - self._graph.set_e_emb(t, self._edges, {key: v}) - else: - if isinstance(val, dict): - raise ValueError( - "There is only one edge type in this view. " - "Expected a single tensor." - ) - self._graph.set_e_emb(self._etype, self._edges, {key: v}) - - def __delitem__(self, key: str): - if isinstance(self._etype, list): - for t in self._etype: - self._graph.pop_e_emb(t, key) - else: - self._graph.pop_e_emb(self._etype, key) - - def _transpose(self, fetch_vals=True): - if isinstance(self._etype, list): - tr = defaultdict(dict) - for etype in self._etype: - for key in self._graph._get_e_emb_keys(etype): - tr[key][etype] = ( - self._graph._get_e_emb(etype, key, self._edges) - if fetch_vals - else [] - ) - else: - tr = {} - for key in self._graph._get_e_emb_keys(self._etype): - tr[key] = ( - self._graph._get_e_emb(self._etype, key, self._edges) - if fetch_vals - else [] - ) - - return tr - - def __len__(self): - return len(self._transpose(fetch_vals=False)) - - def __iter__(self): - return iter(self._transpose()) - - def keys(self): - return self._transpose(fetch_vals=False).keys() - - def values(self): - return self._transpose().values() - - def __repr__(self): - return repr(self._transpose(fetch_vals=False)) - - -class HeteroNodeDataView(MutableMapping): - """ - Duck-typed version of DGL's HeteroNodeDataView. - Used for accessing and modifying node features. - """ - - def __init__( - self, - graph: "cugraph_dgl.Graph", - ntype: Union[str, List[str]], - nodes: TensorType, - ): - self.__graph = graph - self.__ntype = ntype - self.__nodes = nodes - - @property - def _ntype(self) -> str: - return self.__ntype - - @property - def _graph(self) -> "cugraph_dgl.Graph": - return self.__graph - - @property - def _nodes(self) -> TensorType: - return self.__nodes - - def __getitem__(self, key: str): - if isinstance(self._ntype, list): - return { - t: self._graph._get_n_emb(t, key, self._nodes) - for t in self._ntype - if self._graph._has_n_emb(t, key) - } - else: - return self._graph._get_n_emb(self._ntype, key, self._nodes) - - def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): - if isinstance(self._ntype, list): - if not isinstance(val, dict): - raise ValueError( - "There are multiple node types in this view. " - "Expected a dictionary of values." - ) - for t, v in val.items(): - if t not in self._ntype: - raise ValueError("Attempted to modify a type out of view.") - self._graph._set_n_emb(t, self._nodes, {key: v}) - else: - if isinstance(val, dict): - raise ValueError( - "There is only one node type in this view. " - "Expected a single value tensor." - ) - self._graph._set_n_emb(self._ntype, self._nodes, {key: val}) - - def __delitem__(self, key: str): - if isinstance(self._ntype, list): - for t in self._ntype: - self._graph._pop_n_emb(t, key) - else: - self._graph.pop_n_emb(self._ntype, key) - - def _transpose(self, fetch_vals=True): - if isinstance(self._ntype, list): - tr = defaultdict(dict) - for ntype in self._ntype: - for key in self._graph._get_n_emb_keys(ntype): - tr[key][ntype] = ( - self._graph._get_n_emb(ntype, key, self._nodes) - if fetch_vals - else [] - ) - else: - tr = {} - for key in self._graph._get_n_emb_keys(self._ntype): - tr[key] = ( - self._graph._get_n_emb(self._ntype, key, self._nodes) - if fetch_vals - else [] - ) - - return tr - - def __len__(self): - return len(self._transpose(fetch_vals=False)) - - def __iter__(self): - return iter(self._transpose()) - - def keys(self): - return self._transpose(fetch_vals=False).keys() - - def values(self): - return self._transpose().values() - - def __repr__(self): - return repr(self._transpose(fetch_vals=False)) - - -class HeteroEdgeView: - """ - Duck-typed version of DGL's HeteroEdgeView. - """ - - def __init__(self, graph): - self.__graph = graph - - @property - def _graph(self) -> "cugraph_dgl.Graph": - return self.__graph - - def __getitem__(self, key): - if isinstance(key, slice): - if not (key.start is None and key.stop is None and key.stop is None): - raise ValueError("Only full slices are supported in DGL.") - edges = dgl.base.ALL - etype = None - elif key is None: - edges = dgl.base.ALL - etype = None - elif isinstance(key, tuple): - if len(key) == 3: - edges = dgl.base.ALL - etype = key - else: - edges = key - etype = None - elif isinstance(key, str): - edges = dgl.base.ALL - etype = key - else: - edges = key - etype = None - - return HeteroEdgeDataView( - graph=self.__graph, - etype=etype, - edges=edges, - ) - - def __call__(self, *args, **kwargs): - if "device" in kwargs: - return self.__graph.all_edges(*args, **kwargs) - - return self.__graph.all_edges(*args, **kwargs, device="cuda") - - -class HeteroNodeView: - """ - Duck-typed version of DGL's HeteroNodeView. - """ - - def __init__(self, graph: "cugraph_dgl.Graph"): - self.__graph = graph - - @property - def _graph(self) -> "cugraph_dgl.Graph": - return self.__graph - - def __getitem__(self, key): - if isinstance(key, slice): - if not (key.start is None and key.stop is None and key.stop is None): - raise ValueError("Only full slices are supported in DGL.") - nodes = dgl.base.ALL - ntype = None - elif isinstance(key, tuple): - nodes, ntype = key - elif key is None or isinstance(key, str): - nodes = dgl.base.ALL - ntype = key - else: - nodes = key - ntype = None - - return HeteroNodeDataView(graph=self.__graph, ntype=ntype, nodes=nodes) - - def __call__(self, ntype=None): - return torch.arange( - 0, self.__graph.num_nodes(ntype), dtype=self.__graph.idtype, device="cuda" - ) diff --git a/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb b/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb deleted file mode 100644 index 15708f5dea6..00000000000 --- a/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb +++ /dev/null @@ -1,269 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "bc952178-34c0-4f13-9003-478d4aa8cd4d", - "metadata": {}, - "source": [ - "# Testing Notebook for cugraph DGL vs DGL Upstream" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d92a81b3-50ac-42ff-97e0-d636945f1f80", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"4\"\n", - "import cudf\n", - "import rmm\n", - "import torch\n", - "from rmm.allocators.torch import rmm_torch_allocator\n", - "rmm.reinitialize(initial_pool_size=15e9)\n", - "#Switch to async pool in case of memory issues due to fragmentation of the pool\n", - "#rmm.mr.set_current_device_resource(rmm.mr.CudaAsyncMemoryResource(initial_pool_size=15e9))\n", - "torch.cuda.memory.change_current_allocator(rmm_torch_allocator)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "f304a5dd-1465-4054-846f-2308a19153fa", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "single_gpu = True" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b6f899ee", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def load_dgl_dataset(dataset_name='ogbn-products'):\n", - " from ogb.nodeproppred import DglNodePropPredDataset\n", - " dataset_root = '/raid/vjawa/gnn/'\n", - " dataset = DglNodePropPredDataset(name = dataset_name, root=dataset_root)\n", - " split_idx = dataset.get_idx_split()\n", - " train_idx, valid_idx, test_idx = split_idx[\"train\"], split_idx[\"valid\"], split_idx[\"test\"]\n", - " g, label = dataset[0]\n", - " g.ndata['label'] = label\n", - " g = g.add_self_loop()\n", - " g = g.to('cpu')\n", - " return g, train_idx" - ] - }, - { - "cell_type": "markdown", - "id": "fdd59d3a-0c1d-425f-a337-34b09c675622", - "metadata": {}, - "source": [ - "# cuGraph DGL DataLoader" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "e1e84844-634e-451e-be74-939f9477562f", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import cugraph_dgl\n", - "import tempfile" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "3808055c-2d7d-4cc7-b1bd-2fe9edd6eb95", - "metadata": {}, - "outputs": [], - "source": [ - "!rm -rf \"/raid/vjawa/obgn_products_sampling/\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "eff3d77b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "g, train_idx = load_dgl_dataset()\n", - "g = cugraph_dgl.cugraph_storage_from_heterograph(g, single_gpu=single_gpu)\n", - "\n", - "batch_size = 1024*2\n", - "fanout_vals=[25, 25]\n", - "sampler = cugraph_dgl.dataloading.NeighborSampler(fanout_vals)\n", - "dataloader = cugraph_dgl.dataloading.DataLoader(\n", - " g, \n", - " train_idx.to('cuda'), # train_nid must be on GPU.\n", - " sampler,\n", - " sampling_output_dir=\"/raid/vjawa/obgn_products_sampling/\", # Path to save sampling results to, Change to the fastest IO path available\n", - " device=torch.device('cuda'), # The device argument must be GPU.\n", - " num_workers=0, # Number of workers must be 0.\n", - " batch_size=batch_size,\n", - " batches_per_partition=50,\n", - " seeds_per_call=50*batch_size,\n", - " drop_last=False,\n", - " shuffle=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "94003c30-756f-4cdb-856a-dec16a5fb4dc", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7.08 s ± 596 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "batch_stats = {}\n", - "for batch_id,(input_nodes, output_nodes, blocks) in enumerate(dataloader):\n", - " batch_stats[batch_id]={'input_nodes':len(input_nodes),'output_nodes':len(output_nodes)}" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d8488e64-ba92-40c6-8e76-3898b1ca4317", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "del dataloader\n", - "del g" - ] - }, - { - "cell_type": "markdown", - "id": "b0a17523-53e9-4780-a9e1-eac4edd464e5", - "metadata": {}, - "source": [ - "# Pure DGL DataLoader" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "0d147756-6410-4b71-aac1-9ef1e3df8fff", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from dgl.dataloading import DataLoader, NeighborSampler\n", - "import dgl" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "7cb2cc68-b4ff-43f2-8b12-b2808510b3f2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "g, train_idx = load_dgl_dataset()\n", - "batch_size = 1024*2\n", - "fanout_vals = [25, 25]\n", - "sampler = dgl.dataloading.MultiLayerNeighborSampler(fanout_vals)\n", - "dataloader = dgl.dataloading.DataLoader(\n", - " g, \n", - " train_idx.to(g.device), # train_nid must be on GPU.\n", - " sampler,\n", - " device=torch.device('cuda'), # The device argument must be GPU.\n", - " num_workers=0, # Number of workers must be 0.\n", - " use_uva=False,\n", - " batch_size=batch_size,\n", - " drop_last=False,\n", - " shuffle=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "7988aca2-7bfb-4200-ac87-008e30c670fb", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7.34 s ± 353 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "dgl_batch_stats = {}\n", - "for batch_id,(input_nodes, output_nodes, blocks) in enumerate(dataloader):\n", - " dgl_batch_stats[batch_id]={'input_nodes':len(input_nodes),'output_nodes':len(output_nodes)}" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "57022ea6-d2fc-4334-a086-82201e8814c8", - "metadata": {}, - "outputs": [], - "source": [ - "del dataloader\n", - "del g" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - }, - "vscode": { - "interpreter": { - "hash": "a1325b9b48ed9084674a30242e696fec2a1a44bbc4c0ef7ed1d4392854f3d402" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/python/cugraph-dgl/examples/graphsage/README.MD b/python/cugraph-dgl/examples/graphsage/README.MD deleted file mode 100644 index ca867f0b634..00000000000 --- a/python/cugraph-dgl/examples/graphsage/README.MD +++ /dev/null @@ -1,26 +0,0 @@ -Inductive Representation Learning on Large Graphs (GraphSAGE) -============ - -- Paper link: [http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf](http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf) -- Author's code repo: [https://github.com/williamleif/graphsage-simple](https://github.com/williamleif/graphsage-simple) - -For advanced usages, including training with multi-gpu/multi-node, and PyTorch Lightning, etc., more examples can be found in [advanced](https://github.com/dmlc/dgl/tree/master/examples/pytorch/graphsage/advanced) and [dist](https://github.com/dmlc/dgl/tree/master/examples/pytorch/graphsage/dist) directory. - -Requirements ------------- - -```bash -mamba install ogb torchmetrics -c conda-forge -``` - -How to run -------- - - -### Minibatch training for node classification - -Train w/ mini-batch sampling with cugraph_storage backend for node classification on "ogbn-products" - -```bash -python3 node_classification.py --mode=gpu_cugraph_dgl -``` diff --git a/python/cugraph-dgl/examples/graphsage/node-classification-dask.py b/python/cugraph-dgl/examples/graphsage/node-classification-dask.py deleted file mode 100644 index 0481f9566bc..00000000000 --- a/python/cugraph-dgl/examples/graphsage/node-classification-dask.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# Example modified from: -# https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/node_classification.py - -# Ignore Warning -import warnings -import time -import cugraph_dgl -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchmetrics.functional as MF -import dgl -import dgl.nn as dglnn -from dgl.data import AsNodePredDataset -from dgl.dataloading import ( - DataLoader, - NeighborSampler, - MultiLayerFullNeighborSampler, -) -from ogb.nodeproppred import DglNodePropPredDataset -import tqdm -import argparse - -warnings.filterwarnings("ignore") - - -def set_allocators(): - import rmm - import cudf - import cupy - from rmm.allocators.torch import rmm_torch_allocator - from rmm.allocators.cupy import rmm_cupy_allocator - - mr = rmm.mr.CudaAsyncMemoryResource() - rmm.mr.set_current_device_resource(mr) - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - cupy.cuda.set_allocator(rmm_cupy_allocator) - cudf.set_option("spill", True) - - -class SAGE(nn.Module): - def __init__(self, in_size, hid_size, out_size): - super().__init__() - self.layers = nn.ModuleList() - # three-layer GraphSAGE-mean - self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) - self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) - self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) - self.dropout = nn.Dropout(0.5) - self.hid_size = hid_size - self.out_size = out_size - - def forward(self, blocks, x): - h = x - for l_id, (layer, block) in enumerate(zip(self.layers, blocks)): - h = layer(block, h) - if l_id != len(self.layers) - 1: - h = F.relu(h) - h = self.dropout(h) - return h - - def inference(self, g, device, batch_size): - """Conduct layer-wise inference to get all the node embeddings.""" - all_node_ids = torch.arange(0, g.num_nodes()).to(device) - feat = g.get_node_storage(key="feat", ntype="_N").fetch( - all_node_ids, device=device - ) - - sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) - dataloader = DataLoader( - g, - torch.arange(g.num_nodes()).to(g.device), - sampler, - device=device, - batch_size=batch_size, - shuffle=False, - drop_last=False, - num_workers=0, - ) - buffer_device = torch.device("cpu") - pin_memory = buffer_device != device - - for l_id, layer in enumerate(self.layers): - y = torch.empty( - g.num_nodes(), - self.hid_size if l_id != len(self.layers) - 1 else self.out_size, - device=buffer_device, - pin_memory=pin_memory, - ) - feat = feat.to(device) - for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): - x = feat[input_nodes] - h = layer(blocks[0], x) # len(blocks) = 1 - if l_id != len(self.layers) - 1: - h = F.relu(h) - h = self.dropout(h) - # by design, our output nodes are contiguous - y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) - feat = y - return y - - -def evaluate(model, graph, dataloader): - model.eval() - ys = [] - y_hats = [] - for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - with torch.no_grad(): - if isinstance(graph.ndata["feat"], dict): - x = graph.ndata["feat"]["_N"][input_nodes] - label = graph.ndata["label"]["_N"][output_nodes] - else: - x = graph.ndata["feat"][input_nodes] - label = graph.ndata["label"][output_nodes] - ys.append(label) - y_hats.append(model(blocks, x)) - num_classes = y_hats[0].shape[1] - return MF.accuracy( - torch.cat(y_hats), - torch.cat(ys), - task="multiclass", - num_classes=num_classes, - ) - - -def layerwise_infer(device, graph, nid, model, batch_size): - model.eval() - with torch.no_grad(): - pred = model.inference(graph, device, batch_size) # pred in buffer_device - pred = pred[nid] - label = graph.ndata["label"] - if isinstance(label, dict): - label = label["_N"] - label = label[nid].to(device).to(pred.device) - num_classes = pred.shape[1] - return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) - - -def train(args, device, g, dataset, model): - # create sampler & dataloader - train_idx = dataset.train_idx.to(device) - val_idx = dataset.val_idx.to(device) - - use_uva = args.mode == "mixed" - batch_size = 1024 - fanouts = [5, 10, 15] - sampler = NeighborSampler(fanouts) - train_dataloader = DataLoader( - g, - train_idx, - sampler, - device=device, - batch_size=batch_size, - shuffle=True, - drop_last=False, - num_workers=0, - use_uva=use_uva, - ) - val_dataloader = DataLoader( - g, - val_idx, - sampler, - device=device, - batch_size=batch_size, - shuffle=True, - drop_last=False, - num_workers=0, - use_uva=use_uva, - ) - - opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) - - for epoch in range(10): - model.train() - total_loss = 0 - st = time.time() - for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - if isinstance(g.ndata["feat"], dict): - x = g.ndata["feat"]["_N"][input_nodes] - y = g.ndata["label"]["_N"][output_nodes] - else: - x = g.ndata["feat"][input_nodes] - y = g.ndata["label"][output_nodes] - - y_hat = model(blocks, x) - loss = F.cross_entropy(y_hat, y) - opt.zero_grad() - loss.backward() - opt.step() - total_loss += loss.item() - - et = time.time() - - print( - f"Time taken for epoch {epoch} with batch_size {batch_size} = {et - st} s" - ) - acc = evaluate(model, g, val_dataloader) - print( - "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( - epoch, total_loss / (it + 1), acc.item() - ) - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--mode", - default="gpu_cugraph_dgl", - choices=["cpu", "mixed", "gpu_dgl", "gpu_cugraph_dgl"], - help="Training mode." - " 'cpu' for CPU training," - " 'mixed' for CPU-GPU mixed training, " - " 'gpu_dgl' for pure-GPU training, " - " 'gpu_cugraph_dgl' for pure-GPU training.", - ) - args = parser.parse_args() - if not torch.cuda.is_available(): - args.mode = "cpu" - if args.mode == "gpu_cugraph_dgl": - set_allocators() - print(f"Training in {args.mode} mode.") - - # load and preprocess dataset - print("Loading data") - dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) - g = dataset[0] - g = dgl.add_self_loop(g) - if args.mode == "gpu_cugraph_dgl": - g = cugraph_dgl.cugraph_storage_from_heterograph(g.to("cuda")) - del dataset.g - - else: - g = g.to("cuda" if args.mode == "gpu_dgl" else "cpu") - device = torch.device( - "cpu" if args.mode == "cpu" or args.mode == "mixed" else "cuda" - ) - - # create GraphSAGE model - feat_shape = ( - g.get_node_storage(key="feat", ntype="_N") - .fetch(torch.LongTensor([0]).to(device), device=device) - .shape[1] - ) - print(feat_shape) - # no ndata in cugraph storage object - in_size = feat_shape - out_size = dataset.num_classes - model = SAGE(in_size, 256, out_size).to(device) - - # model training - print("Training...") - train(args, device, g, dataset, model) - - # test the model - print("Testing...") - acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096) - print("Test Accuracy {:.4f}".format(acc.item())) diff --git a/python/cugraph-dgl/examples/graphsage/node-classification.py b/python/cugraph-dgl/examples/graphsage/node-classification.py deleted file mode 100644 index 56ac41c09b4..00000000000 --- a/python/cugraph-dgl/examples/graphsage/node-classification.py +++ /dev/null @@ -1,283 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# Example modified from: -# https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/node_classification.py - -# Ignore Warning -import warnings -import tempfile -import time -import cugraph_dgl -import cugraph_dgl.dataloading -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchmetrics.functional as MF -import dgl -import dgl.nn as dglnn -from dgl.data import AsNodePredDataset -from dgl.dataloading import ( - DataLoader, - NeighborSampler, - MultiLayerFullNeighborSampler, -) -from ogb.nodeproppred import DglNodePropPredDataset -import tqdm -import argparse - -warnings.filterwarnings("ignore") - - -def set_allocators(): - import rmm - import cudf - import cupy - from rmm.allocators.torch import rmm_torch_allocator - from rmm.allocators.cupy import rmm_cupy_allocator - - mr = rmm.mr.CudaAsyncMemoryResource() - rmm.mr.set_current_device_resource(mr) - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - cupy.cuda.set_allocator(rmm_cupy_allocator) - cudf.set_option("spill", True) - - -class SAGE(nn.Module): - def __init__(self, in_size, hid_size, out_size): - super().__init__() - self.layers = nn.ModuleList() - # three-layer GraphSAGE-mean - self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) - self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) - self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) - self.dropout = nn.Dropout(0.5) - self.hid_size = hid_size - self.out_size = out_size - - def forward(self, blocks, x): - h = x - for l_id, (layer, block) in enumerate(zip(self.layers, blocks)): - h = layer(block, h) - if l_id != len(self.layers) - 1: - h = F.relu(h) - h = self.dropout(h) - return h - - def inference(self, g, device, batch_size): - """Conduct layer-wise inference to get all the node embeddings.""" - all_node_ids = torch.arange(0, g.num_nodes()).to(device) - feat = g.ndata["feat"][all_node_ids].to(device) - - if isinstance(g, cugraph_dgl.Graph): - sampler = cugraph_dgl.dataloading.NeighborSampler([-1]) - loader_cls = cugraph_dgl.dataloading.FutureDataLoader - else: - sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) - loader_cls = DataLoader - dataloader = loader_cls( - g, - torch.arange(g.num_nodes()).to(device), - sampler, - device=device, - batch_size=batch_size, - shuffle=False, - drop_last=False, - num_workers=0, - ) - buffer_device = torch.device("cpu") - pin_memory = buffer_device != device - - for l_id, layer in enumerate(self.layers): - y = torch.empty( - g.num_nodes(), - self.hid_size if l_id != len(self.layers) - 1 else self.out_size, - device=buffer_device, - pin_memory=pin_memory, - ) - feat = feat.to(device) - for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): - x = feat[input_nodes] - h = layer(blocks[0], x) # len(blocks) = 1 - if l_id != len(self.layers) - 1: - h = F.relu(h) - h = self.dropout(h) - # by design, our output nodes are contiguous - y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) - feat = y - return y - - -def evaluate(model, graph, dataloader): - model.eval() - ys = [] - y_hats = [] - for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): - with torch.no_grad(): - if isinstance(graph.ndata["feat"], dict): - x = graph.ndata["feat"]["_N"][input_nodes] - label = graph.ndata["label"]["_N"][output_nodes] - else: - x = graph.ndata["feat"][input_nodes] - label = graph.ndata["label"][output_nodes] - ys.append(label) - y_hats.append(model(blocks, x)) - num_classes = y_hats[0].shape[1] - return MF.accuracy( - torch.cat(y_hats), - torch.cat(ys), - task="multiclass", - num_classes=num_classes, - ) - - -def layerwise_infer(device, graph, nid, model, batch_size): - model.eval() - with torch.no_grad(): - pred = model.inference(graph, device, batch_size) # pred in buffer_device - pred = pred[nid] - label = graph.ndata["label"] - if isinstance(label, dict): - label = label["_N"] - label = label[nid].to(device).to(pred.device) - num_classes = pred.shape[1] - return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) - - -def train(args, device, g, dataset, model, directory): - # create sampler & dataloader - train_idx = dataset.train_idx.to(device) - val_idx = dataset.val_idx.to(device) - - use_uva = args.mode == "mixed" - batch_size = 1024 - fanouts = [5, 10, 15] - if isinstance(g, cugraph_dgl.Graph): - sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts, directory=directory) - loader_cls = cugraph_dgl.dataloading.FutureDataLoader - else: - sampler = NeighborSampler(fanouts) - loader_cls = DataLoader - train_dataloader = loader_cls( - g, - train_idx, - sampler, - device=device, - batch_size=batch_size, - shuffle=True, - drop_last=False, - num_workers=0, - use_uva=use_uva, - ) - val_dataloader = loader_cls( - g, - val_idx, - sampler, - device=device, - batch_size=batch_size, - shuffle=True, - drop_last=False, - num_workers=0, - use_uva=use_uva, - ) - - opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) - - for epoch in range(10): - model.train() - total_loss = 0 - st = time.time() - for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - if isinstance(g.ndata["feat"], dict): - x = g.ndata["feat"]["_N"][input_nodes] - y = g.ndata["label"]["_N"][output_nodes] - else: - x = g.ndata["feat"][input_nodes] - y = g.ndata["label"][output_nodes] - - y_hat = model(blocks, x) - loss = F.cross_entropy(y_hat, y) - opt.zero_grad() - loss.backward() - opt.step() - total_loss += loss.item() - - et = time.time() - - print( - f"Time taken for epoch {epoch} with batch_size {batch_size} = {et - st} s" - ) - acc = evaluate(model, g, val_dataloader) - print( - "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( - epoch, total_loss / (it + 1), acc.item() - ) - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--mode", - default="gpu_cugraph_dgl", - choices=["cpu", "mixed", "gpu_dgl", "gpu_cugraph_dgl"], - help="Training mode." - " 'cpu' for CPU training," - " 'mixed' for CPU-GPU mixed training, " - " 'gpu_dgl' for pure-GPU training, " - " 'gpu_cugraph_dgl' for pure-GPU training.", - ) - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--tempdir_root", type=str, default=None) - args = parser.parse_args() - if not torch.cuda.is_available(): - args.mode = "cpu" - if args.mode == "gpu_cugraph_dgl": - set_allocators() - print(f"Training in {args.mode} mode.") - - # load and preprocess dataset - print("Loading data") - dataset = AsNodePredDataset( - DglNodePropPredDataset("ogbn-products", root=args.dataset_root) - ) - g = dataset[0] - g = dgl.add_self_loop(g) - if args.mode == "gpu_cugraph_dgl": - g = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g.to("cuda")) - del dataset.g - - else: - g = g.to("cuda" if args.mode == "gpu_dgl" else "cpu") - device = torch.device( - "cpu" if args.mode == "cpu" or args.mode == "mixed" else "cuda" - ) - - # create GraphSAGE model - feat_shape = g.ndata["feat"].shape[1] - print(feat_shape) - - in_size = feat_shape - out_size = dataset.num_classes - model = SAGE(in_size, 256, out_size).to(device) - - # model training - print("Training...") - with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: - train(args, device, g, dataset, model, directory) - - # test the model - print("Testing...") - acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096) - print("Test Accuracy {:.4f}".format(acc.item())) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py deleted file mode 100644 index 3e0c0454905..00000000000 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A graphsage GNN model using dgl for node classification -# with three layers and mean aggregation -import time -import dgl -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchmetrics.functional as MF -from cugraph_dgl.nn import SAGEConv -import tqdm - - -class Sage(nn.Module): - def __init__(self, in_size, hid_size, out_size): - super().__init__() - self.layers = nn.ModuleList() - # 2-layer GraphSAGE-mean - self.layers.append(SAGEConv(in_size, hid_size, "mean")) - self.layers.append(SAGEConv(hid_size, out_size, "mean")) - self.dropout = nn.Dropout(0.5) - self.hid_size = hid_size - self.out_size = out_size - - def forward(self, blocks, x): - h = x - for l_id, (layer, block) in enumerate(zip(self.layers, blocks)): - h = layer(block, h) - if l_id != len(self.layers) - 1: - h = F.relu(h) - h = self.dropout(h) - return h - - def inference(self, g, batch_size, device): - """ - Inference with the GraphSAGE model on - full neighbors (i.e. without neighbor sampling). - g : the entire graph. - batch_size : the node number of each inference output - device : the inference device - """ - # During inference with sampling, - # multi-layer blocks are very inefficient because - # lots of computations in the first few layers are repeated. - # Therefore, we compute the representation of all nodes layer by layer. - # The nodes on each layer are of course splitted in batches. - - all_node_ids = torch.arange(0, g.num_nodes()).to(device) - feat = g.ndata["feat"][all_node_ids].to(device) - - sampler = dgl.dataloading.MultiLayerFullNeighborSampler( - 1, prefetch_node_feats=["feat"] - ) - dataloader = dgl.dataloading.DataLoader( - g, - torch.arange(g.num_nodes(), dtype=torch.int32).to(g.device), - sampler, - device=device, - batch_size=batch_size, - shuffle=False, - drop_last=False, - num_workers=0, - ) - buffer_device = torch.device("cpu") - pin_memory = buffer_device != device - - for l_id, layer in enumerate(self.layers): - y = torch.empty( - g.num_nodes(), - self.hid_size if l_id != len(self.layers) - 1 else self.out_size, - device=buffer_device, - pin_memory=pin_memory, - ) - feat = feat.to(device) - for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): - x = feat[input_nodes] - h = layer(blocks[0], x) # len(blocks) = 1 - if l_id != len(self.layers) - 1: - h = F.relu(h) - h = self.dropout(h) - # by design, our output nodes are contiguous - y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) - feat = y - return y - - -def layerwise_infer(graph, nid, model, batch_size, device): - model.eval() - with torch.no_grad(): - pred = model.module.inference( - graph, batch_size, device - ) # pred in buffer_device - pred = pred[nid] - label = graph.ndata["label"] - if isinstance(label, dict): - label = label["_N"] - label = label[nid].to(pred.device) - num_classes = pred.shape[1] - label = label.squeeze(1) - return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) - - -def train_model(model, g, opt, train_dataloader, num_epochs, rank, val_nid): - st = time.time() - model.train() - for epoch in range(num_epochs): - total_loss = 0 - for _, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - x = g.ndata["feat"][input_nodes].to(torch.float32) - y = g.ndata["label"][output_nodes].to(torch.int64) - y_hat = model(blocks, x) - y = y.squeeze(1) - loss = F.cross_entropy(y_hat, y) - opt.zero_grad() - loss.backward() - opt.step() - total_loss += loss.item() - print( - f"total loss: {total_loss} for epoch = {epoch} for rank = {rank}", - flush=True, - ) - et = time.time() - print( - f"Total time taken for num_epochs {num_epochs} " - f"with batch_size {train_dataloader._batch_size} = {et - st} s on rank ={rank}" - ) - if rank == 0: - val_acc = layerwise_infer(g, val_nid, model, 1024 * 5, "cuda") - print("---" * 30) - print("Validation Accuracy {:.4f}".format(val_acc)) diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py deleted file mode 100644 index 11afe466014..00000000000 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py +++ /dev/null @@ -1,311 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import dgl -import torch -import time -import tempfile -import argparse -import json -import os -import warnings - -from datetime import timedelta - -import cugraph_dgl - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, -) - -from pylibwholegraph.torch.initialize import ( - init as wm_init, - finalize as wm_finalize, -) - -# Allow computation on objects that are larger than GPU memory -# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory -os.environ["CUDF_SPILL"] = "1" - - -def init_ddp_worker(global_rank, local_rank, world_size, cugraph_id): - import rmm - - rmm.reinitialize( - devices=local_rank, - managed_memory=True, - pool_allocator=True, - ) - - import cupy - - cupy.cuda.Device(local_rank).use() - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - torch.cuda.set_device(local_rank) - - cugraph_comms_init( - rank=global_rank, world_size=world_size, uid=cugraph_id, device=local_rank - ) - - wm_init(global_rank, world_size, local_rank, torch.cuda.device_count()) - - -def load_dgl_dataset(dataset_root="dataset", dataset_name="ogbn-products"): - from ogb.nodeproppred import DglNodePropPredDataset - - dataset = DglNodePropPredDataset(root=dataset_root, name=dataset_name) - split_idx = dataset.get_idx_split() - train_idx, valid_idx, test_idx = ( - split_idx["train"], - split_idx["valid"], - split_idx["test"], - ) - g, label = dataset[0] - g.ndata["label"] = label - if len(g.etypes) <= 1: - g = dgl.add_self_loop(g) - else: - for etype in g.etypes: - if etype[0] == etype[2]: - # only add self loops for src->dst - g = dgl.add_self_loop(g, etype=etype) - - g = g.int() - idx = { - "train": train_idx.int(), - "valid": valid_idx.int(), - "test": test_idx.int(), - } - - return g, idx, dataset.num_classes - - -def partition_data( - g, split_idx, num_classes, edge_path, feature_path, label_path, meta_path -): - # Split and save edge index - os.makedirs( - edge_path, - exist_ok=True, - ) - src, dst = g.all_edges(form="uv", order="eid") - edge_index = torch.stack([src, dst]) - for (r, e) in enumerate(torch.tensor_split(edge_index, world_size, dim=1)): - rank_path = os.path.join(edge_path, f"rank={r}.pt") - torch.save( - e.clone(), - rank_path, - ) - - # Split and save features - os.makedirs( - feature_path, - exist_ok=True, - ) - - nix = torch.arange(g.num_nodes()) - for (r, f) in enumerate(torch.tensor_split(nix, world_size)): - feat_path = os.path.join(feature_path, f"rank={r}_feat.pt") - torch.save(g.ndata["feat"][f], feat_path) - - label_f_path = os.path.join(feature_path, f"rank={r}_label.pt") - torch.save(g.ndata["label"][f], label_f_path) - - # Split and save labels - os.makedirs( - label_path, - exist_ok=True, - ) - for (d, i) in split_idx.items(): - i_parts = torch.tensor_split(i, world_size) - for r, i_part in enumerate(i_parts): - rank_path = os.path.join(label_path, f"rank={r}") - os.makedirs(rank_path, exist_ok=True) - torch.save(i_part, os.path.join(rank_path, f"{d}.pt")) - - # Save metadata - meta = { - "num_classes": int(num_classes), - "num_nodes": int(g.num_nodes()), - } - with open(meta_path, "w") as f: - json.dump(meta, f) - - -def load_partitioned_data(rank, edge_path, feature_path, label_path, meta_path): - g = cugraph_dgl.Graph( - is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" - ) - - # Load metadata - with open(meta_path, "r") as f: - meta = json.load(f) - - # Load labels - split_idx = {} - for split in ["train", "test", "valid"]: - split_idx[split] = torch.load( - os.path.join(label_path, f"rank={rank}", f"{split}.pt") - ) - - # Load features - feat_t = torch.load(os.path.join(feature_path, f"rank={rank}_feat.pt")) - label_f_t = torch.load(os.path.join(feature_path, f"rank={rank}_label.pt")) - ndata = {"feat": feat_t, "label": label_f_t} - g.add_nodes(meta["num_nodes"], data=ndata) - - # Load edge index - src, dst = torch.load(os.path.join(edge_path, f"rank={rank}.pt")) - g.add_edges(src.cuda(), dst.cuda(), data=None) - - return g, split_idx, meta["num_classes"] - - -def create_dataloader(gs, train_idx, device, temp_dir, stage): - import cugraph_dgl - - temp_path = os.path.join(temp_dir, f"{stage}_{device}") - os.mkdir(temp_path) - - sampler = cugraph_dgl.dataloading.NeighborSampler( - [10, 20], - directory=temp_path, - batches_per_partition=10, - ) - - dataloader = cugraph_dgl.dataloading.FutureDataLoader( - gs, - train_idx, - sampler, - device=device, # Put the sampled MFGs on CPU or GPU - use_ddp=True, # Make it work with distributed data parallel - batch_size=1024, - shuffle=False, # Whether to shuffle the nodes for every epoch - drop_last=False, - num_workers=0, - ) - return dataloader - - -def run_workflow( - global_rank, local_rank, world_size, g, split_idx, num_classes, temp_dir -): - from model import Sage, train_model - - # Below sets gpu_number - dev_id = local_rank - device = torch.device(f"cuda:{dev_id}") - - dataloader = create_dataloader(g, split_idx["train"], device, temp_dir, "train") - print("Dataloader Creation Complete", flush=True) - num_feats = g.ndata["feat"].shape[1] - hid_size = 256 - # Load Training example - model = Sage(num_feats, hid_size, num_classes).to(device) - model = torch.nn.parallel.DistributedDataParallel( - model, - device_ids=[device], - output_device=device, - ) - torch.distributed.barrier() - n_epochs = 10 - total_st = time.time() - opt = torch.optim.Adam(model.parameters(), lr=0.01) - train_model(model, g, opt, dataloader, n_epochs, global_rank, split_idx["valid"]) - torch.distributed.barrier() - total_et = time.time() - print( - f"Total time taken on n_epochs {n_epochs} = {total_et - total_st} s", - f"measured by worker = {global_rank}", - ) - - wm_finalize() - cugraph_comms_shutdown() - - -if __name__ == "__main__": - if "LOCAL_RANK" in os.environ: - parser = argparse.ArgumentParser() - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--tempdir_root", type=str, default=None) - parser.add_argument("--dataset", type=str, default="ogbn-products") - parser.add_argument("--skip_partition", action="store_true") - args = parser.parse_args() - - torch.distributed.init_process_group( - "nccl", - timeout=timedelta(minutes=60), - ) - world_size = torch.distributed.get_world_size() - global_rank = torch.distributed.get_rank() - local_rank = int(os.environ["LOCAL_RANK"]) - device = torch.device(local_rank) - - # Create the uid needed for cuGraph comms - if global_rank == 0: - cugraph_id = [cugraph_comms_create_unique_id()] - else: - cugraph_id = [None] - torch.distributed.broadcast_object_list(cugraph_id, src=0, device=device) - cugraph_id = cugraph_id[0] - - init_ddp_worker(global_rank, local_rank, world_size, cugraph_id) - - # Split the data - edge_path = os.path.join(args.dataset_root, args.dataset + "_eix_part") - feature_path = os.path.join(args.dataset_root, args.dataset + "_fea_part") - label_path = os.path.join(args.dataset_root, args.dataset + "_label_part") - meta_path = os.path.join(args.dataset_root, args.dataset + "_meta.json") - - if not args.skip_partition and global_rank == 0: - partition_data( - *load_dgl_dataset(args.dataset_root, args.dataset), - edge_path, - feature_path, - label_path, - meta_path, - ) - torch.distributed.barrier() - - print("loading partitions...") - g, split_idx, num_classes = load_partitioned_data( - rank=global_rank, - edge_path=edge_path, - feature_path=feature_path, - label_path=label_path, - meta_path=meta_path, - ) - print(f"rank {global_rank} has loaded its partition") - torch.distributed.barrier() - - with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: - run_workflow( - global_rank, - local_rank, - world_size, - g, - split_idx, - num_classes, - directory, - ) - else: - warnings.warn("This script should be run with 'torchrun`. Exiting.") diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py deleted file mode 100644 index 001d7fb82dc..00000000000 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import dgl -import torch -import time -import tempfile -import argparse -import os - -import cugraph_dgl - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, -) - -from pylibwholegraph.torch.initialize import ( - init as wm_init, - finalize as wm_finalize, -) - -# Allow computation on objects that are larger than GPU memory -# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory -os.environ["CUDF_SPILL"] = "1" - - -def initalize_pytorch_worker(dev_id): - import cupy as cp - import rmm - from rmm.allocators.cupy import rmm_cupy_allocator - - dev = cp.cuda.Device( - dev_id - ) # Create cuda context on the right gpu, defaults to gpu-0 - dev.use() - rmm.reinitialize( - pool_allocator=True, - initial_pool_size=10e9, - maximum_pool_size=15e9, - devices=[dev_id], - ) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - torch.cuda.set_device(dev_id) - cp.cuda.set_allocator(rmm_cupy_allocator) - print("device_id", dev_id, flush=True) - - -def load_dgl_dataset( - dataset_name="ogbn-products", - dataset_root=None, -): - from ogb.nodeproppred import DglNodePropPredDataset - - dataset = DglNodePropPredDataset(name=dataset_name, root=dataset_root) - split_idx = dataset.get_idx_split() - train_idx, valid_idx, test_idx = ( - split_idx["train"], - split_idx["valid"], - split_idx["test"], - ) - g, label = dataset[0] - g.ndata["label"] = label - if len(g.etypes) <= 1: - g = dgl.add_self_loop(g) - else: - for etype in g.etypes: - if etype[0] == etype[2]: - # only add self loops for src->dst - g = dgl.add_self_loop(g, etype=etype) - - g = g.int() - train_idx = train_idx.int() - valid_idx = valid_idx.int() - test_idx = test_idx.int() - return g, train_idx, valid_idx, test_idx, dataset.num_classes - - -def create_cugraph_graphstore_from_dgl_dataset(dataset, rank, world_size): - (g, train_idx, valid_idx, test_idx, num_classes) = dataset - # Partition the data - cg = cugraph_dgl.Graph( - is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" - ) - - nix = torch.tensor_split(torch.arange(g.num_nodes()), world_size)[rank] - ndata = {k: g.ndata[k][nix].cuda() for k in g.ndata.keys()} - - eix = torch.tensor_split(torch.arange(g.num_edges()), world_size)[rank] - src, dst = g.all_edges(form="uv", order="eid") - edata = {k: g.edata[k][eix].cuda() for k in g.edata.keys()} - - cg.add_nodes(g.num_nodes(), data=ndata) - cg.add_edges( - torch.tensor_split(src, world_size)[rank].cuda(), - torch.tensor_split(dst, world_size)[rank].cuda(), - data=edata, - ) - - return ( - cg, - torch.tensor_split(train_idx, world_size)[rank].to(torch.int64), - torch.tensor_split(valid_idx, world_size)[rank].to(torch.int64), - torch.tensor_split(test_idx, world_size)[rank].to(torch.int64), - num_classes, - ) - - -def create_dataloader(gs, train_idx, device, temp_dir, stage): - import cugraph_dgl - - temp_path = os.path.join(temp_dir, f"{stage}_{device}") - os.mkdir(temp_path) - - sampler = cugraph_dgl.dataloading.NeighborSampler( - [10, 20], - directory=temp_path, - batches_per_partition=10, - ) - dataloader = cugraph_dgl.dataloading.FutureDataLoader( - gs, - train_idx, - sampler, - device=device, # Put the sampled MFGs on CPU or GPU - use_ddp=True, # Make it work with distributed data parallel - batch_size=1024, - shuffle=False, # Whether to shuffle the nodes for every epoch - drop_last=False, - num_workers=0, - ) - return dataloader - - -def run_workflow(rank, world_size, cugraph_id, dataset, temp_dir): - from model import Sage, train_model - - # Below sets gpu_number - dev_id = rank - initalize_pytorch_worker(dev_id) - device = torch.device(f"cuda:{dev_id}") - - # Pytorch training worker initialization - dist_init_method = "tcp://{master_ip}:{master_port}".format( - master_ip="127.0.0.1", master_port="12346" - ) - - torch.distributed.init_process_group( - backend="nccl", - init_method=dist_init_method, - world_size=world_size, - rank=rank, - ) - - cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) - wm_init(rank, world_size, rank, world_size) - - print(f"rank {rank}.", flush=True) - print("Initalized across GPUs.") - - ( - gs, - train_idx, - valid_idx, - test_idx, - num_classes, - ) = create_cugraph_graphstore_from_dgl_dataset( - dataset, - rank, - world_size, - ) - del dataset - - torch.distributed.barrier() - print(f"Loading graph to worker {rank} is complete", flush=True) - - dataloader = create_dataloader(gs, train_idx, device, temp_dir, "train") - print("Dataloader Creation Complete", flush=True) - num_feats = gs.ndata["feat"].shape[1] - hid_size = 256 - # Load Training example - model = Sage(num_feats, hid_size, num_classes).to(device) - model = torch.nn.parallel.DistributedDataParallel( - model, - device_ids=[device], - output_device=device, - ) - torch.distributed.barrier() - n_epochs = 10 - total_st = time.time() - opt = torch.optim.Adam(model.parameters(), lr=0.01) - train_model(model, gs, opt, dataloader, n_epochs, rank, valid_idx) - torch.distributed.barrier() - total_et = time.time() - print( - f"Total time taken on n_epochs {n_epochs} = {total_et - total_st} s", - f"measured by worker = {rank}", - ) - - torch.cuda.synchronize() - wm_finalize() - cugraph_comms_shutdown() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--tempdir_root", type=str, default=None) - parser.add_argument("--dataset", type=str, default="ogbn-products") - args = parser.parse_args() - - from rmm.allocators.torch import rmm_torch_allocator - - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - - # Create the uid needed for cuGraph comms - cugraph_id = cugraph_comms_create_unique_id() - - ds = load_dgl_dataset(args.dataset, args.dataset_root) - - world_size = torch.cuda.device_count() - - with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: - torch.multiprocessing.spawn( - run_workflow, - args=(world_size, cugraph_id, ds, directory), - nprocs=world_size, - ) diff --git a/python/cugraph-dgl/pyproject.toml b/python/cugraph-dgl/pyproject.toml deleted file mode 100644 index af9e91a988e..00000000000 --- a/python/cugraph-dgl/pyproject.toml +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. - -[build-system] - -requires = [ - "rapids-build-backend>=0.3.1,<0.4.0.dev0", - "setuptools>=61.0.0", - "wheel", -] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. -build-backend = "rapids_build_backend.build" - -[project] -name = "cugraph-dgl" -dynamic = ["version"] -description = "cugraph extensions for DGL" -readme = { file = "README.md", content-type = "text/markdown" } -authors = [ - { name = "NVIDIA Corporation" }, -] -license = { text = "Apache 2.0" } -requires-python = ">=3.10" -classifiers = [ - "Intended Audience :: Developers", - "Programming Language :: Python", -] -dependencies = [ - "cugraph==24.12.*,>=0.0.0a0", - "numba>=0.57", - "numpy>=1.23,<3.0a0", - "pylibcugraphops==24.12.*,>=0.0.0a0", -] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. - -[project.optional-dependencies] -test = [ - "pandas", - "pylibwholegraph==24.12.*,>=0.0.0a0", - "pytest", - "pytest-benchmark", - "pytest-cov", - "pytest-xdist", - "scipy", - "tensordict>=0.1.2", - "torch>=2.3", -] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. - -[project.urls] -Homepage = "https://github.com/rapidsai/cugraph" -Documentation = "https://docs.rapids.ai/api/cugraph/stable/" - -[tool.setuptools] -license-files = ["LICENSE"] - -[tool.setuptools.dynamic] -version = {file = "cugraph_dgl/VERSION"} - -[tool.setuptools.packages.find] -include = [ - "cugraph_dgl*", -] - -[tool.rapids-build-backend] -build-backend = "setuptools.build_meta" -dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true" diff --git a/python/cugraph-dgl/tests/test_version.py b/python/cugraph-dgl/tests/test_version.py deleted file mode 100644 index 343e4fb2675..00000000000 --- a/python/cugraph-dgl/tests/test_version.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -import cugraph_dgl - - -def test_version_constants_are_populated(): - # __git_commit__ will only be non-empty in a built distribution - assert isinstance(cugraph_dgl.__git_commit__, str) - - # __version__ should always be non-empty - assert isinstance(cugraph_dgl.__version__, str) - assert len(cugraph_dgl.__version__) > 0 diff --git a/python/cugraph-pyg/LICENSE b/python/cugraph-pyg/LICENSE deleted file mode 120000 index 30cff7403da..00000000000 --- a/python/cugraph-pyg/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../../LICENSE \ No newline at end of file diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml deleted file mode 100644 index 5fbd947965f..00000000000 --- a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# This file is generated by `rapids-dependency-file-generator`. -# To make changes, edit ../../../dependencies.yaml and run `rapids-dependency-file-generator`. -channels: -- rapidsai -- rapidsai-nightly -- dask/label/dev -- dglteam/label/th23_cu118 -- conda-forge -- nvidia -dependencies: -- cugraph==24.12.*,>=0.0.0a0 -- pandas -- pre-commit -- pylibcugraphops==24.12.*,>=0.0.0a0 -- pytest -- pytest-benchmark -- pytest-cov -- pytest-xdist -- pytorch-cuda==11.8 -- pytorch>=2.3 -- pytorch_geometric>=2.5,<2.6 -- scipy -- tensordict>=0.1.2 -name: cugraph_pyg_dev_cuda-118 diff --git a/python/cugraph-pyg/cugraph_pyg/VERSION b/python/cugraph-pyg/cugraph_pyg/VERSION deleted file mode 120000 index d62dc733efd..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/VERSION +++ /dev/null @@ -1 +0,0 @@ -../../../VERSION \ No newline at end of file diff --git a/python/cugraph-pyg/cugraph_pyg/__init__.py b/python/cugraph-pyg/cugraph_pyg/__init__.py deleted file mode 100644 index e566e6e9fdd..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph_pyg._version import __git_commit__, __version__ - -import cugraph_pyg.data -import cugraph_pyg.loader -import cugraph_pyg.sampler -import cugraph_pyg.nn diff --git a/python/cugraph-pyg/cugraph_pyg/_version.py b/python/cugraph-pyg/cugraph_pyg/_version.py deleted file mode 100644 index 053b163116d..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/_version.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import importlib.resources - -# Read VERSION file from the module that is symlinked to VERSION file -# in the root of the repo at build time or copied to the moudle at -# installation. VERSION is a separate file that allows CI build-time scripts -# to update version info (including commit hashes) without modifying -# source files. -__version__ = ( - importlib.resources.files(__package__).joinpath("VERSION").read_text().strip() -) -try: - __git_commit__ = ( - importlib.resources.files(__package__) - .joinpath("GIT_COMMIT") - .read_text() - .strip() - ) -except FileNotFoundError: - __git_commit__ = "" - -__all__ = ["__git_commit__", "__version__"] diff --git a/python/cugraph-pyg/cugraph_pyg/data/__init__.py b/python/cugraph-pyg/cugraph_pyg/data/__init__.py deleted file mode 100644 index 6d51fd5ea01..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/data/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from cugraph_pyg.data.dask_graph_store import DaskGraphStore -from cugraph_pyg.data.graph_store import GraphStore -from cugraph_pyg.data.feature_store import ( - TensorDictFeatureStore, - WholeFeatureStore, -) - - -def CuGraphStore(*args, **kwargs): - warnings.warn("CuGraphStore has been renamed to DaskGraphStore", FutureWarning) - return DaskGraphStore(*args, **kwargs) diff --git a/python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py deleted file mode 100644 index 6195f3118a4..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py +++ /dev/null @@ -1,1321 +0,0 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Any, Union, List, Dict - -from enum import Enum, auto - -from dataclasses import dataclass -from collections import defaultdict -from itertools import chain -from functools import cached_property - -import numpy as np -import cupy -import pandas -import cudf -import cugraph -import warnings - -import dask.array as dar -import dask.dataframe as dd -import dask.distributed as distributed -import dask_cudf - -from cugraph.utilities.utils import import_optional, MissingModule - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - -Tensor = None if isinstance(torch, MissingModule) else torch.Tensor -NdArray = None if isinstance(cupy, MissingModule) else cupy.ndarray -DaskCudfSeries = None if isinstance(dask_cudf, MissingModule) else dask_cudf.Series - -TensorType = Union[Tensor, NdArray, cudf.Series, DaskCudfSeries] -NodeType = ( - None - if isinstance(torch_geometric, MissingModule) - else torch_geometric.typing.NodeType -) -EdgeType = ( - None - if isinstance(torch_geometric, MissingModule) - else torch_geometric.typing.EdgeType -) - - -class EdgeLayout(Enum): - COO = "coo" - CSC = "csc" - CSR = "csr" - - -@dataclass -class CuGraphEdgeAttr: - """ - Defines the attributes of an :obj:`GraphStore` edge. - """ - - # The type of the edge - edge_type: Optional[Any] - - # The layout of the edge representation - layout: EdgeLayout - - # Whether the edge index is sorted, by destination node. Useful for - # avoiding sorting costs when performing neighbor sampling, and only - # meaningful for COO (CSC and CSR are sorted by definition) - is_sorted: bool = False - - # The number of nodes in this edge type. If set to None, will attempt to - # infer with the simple heuristic int(self.edge_index.max()) + 1 - size: Optional[Tuple[int, int]] = None - - # NOTE we define __post_init__ to force-cast layout - def __post_init__(self): - self.layout = EdgeLayout(self.layout) - - @classmethod - def cast(cls, *args, **kwargs): - """ - Cast to a CuGraphTensorAttr from a tuple, list, or dict. - - Returns - ------- - CuGraphTensorAttr - contains the data of the tuple, list, or dict passed in - """ - if len(args) == 1 and len(kwargs) == 0: - elem = args[0] - if elem is None: - return None - if isinstance(elem, CuGraphEdgeAttr): - return elem - if isinstance(elem, (tuple, list)): - return cls(*elem) - if isinstance(elem, dict): - return cls(**elem) - return cls(*args, **kwargs) - - -class _field_status(Enum): - UNSET = auto() - - -@dataclass -class CuGraphTensorAttr: - """ - Defines the attributes of a class:`FeatureStore` tensor; in particular, - all the parameters necessary to uniquely identify a tensor from the feature - store. - - Note that the order of the attributes is important; this is the order in - which attributes must be provided for indexing calls. Feature store - implementor classes can define a different ordering by overriding - :meth:`TensorAttr.__init__`. - """ - - # The group name that the tensor corresponds to. Defaults to UNSET. - group_name: Optional[str] = _field_status.UNSET - - # The name of the tensor within its group. Defaults to UNSET. - attr_name: Optional[str] = _field_status.UNSET - - # The node indices the rows of the tensor correspond to. Defaults to UNSET. - index: Optional[Any] = _field_status.UNSET - - # The properties in the FeatureStore the rows of the tensor correspond to. - # Defaults to UNSET. - properties: Optional[Any] = _field_status.UNSET - - # The datatype of the tensor. Defaults to UNSET. - dtype: Optional[Any] = _field_status.UNSET - - # Convenience methods - - def is_set(self, key): - """ - Whether an attribute is set in :obj:`TensorAttr`. - """ - if key not in self.__dataclass_fields__: - raise KeyError(key) - attr = getattr(self, key) - return type(attr) is not _field_status or attr != _field_status.UNSET - - def is_fully_specified(self): - """ - Whether the :obj:`TensorAttr` has no unset fields. - """ - return all([self.is_set(key) for key in self.__dataclass_fields__]) - - def fully_specify(self): - """ - Sets all :obj:`UNSET` fields to :obj:`None`. - """ - for key in self.__dataclass_fields__: - if not self.is_set(key): - setattr(self, key, None) - return self - - def update(self, attr): - """ - Updates an :class:`TensorAttr` with set attributes from another - :class:`TensorAttr`. - """ - for key in self.__dataclass_fields__: - if attr.is_set(key): - setattr(self, key, getattr(attr, key)) - - @classmethod - def cast(cls, *args, **kwargs): - """ - Casts to a CuGraphTensorAttr from a tuple, list, or dict - - Returns - ------- - CuGraphTensorAttr - contains the data of the tuple, list, or dict passed in - """ - if len(args) == 1 and len(kwargs) == 0: - elem = args[0] - if elem is None: - return None - if isinstance(elem, CuGraphTensorAttr): - return elem - if isinstance(elem, (tuple, list)): - return cls(*elem) - if isinstance(elem, dict): - return cls(**elem) - return cls(*args, **kwargs) - - -class DaskGraphStore: - """ - Duck-typed version of PyG's GraphStore and FeatureStore that uses - Dask to distribute the graph structure across GPUs and a - cugraph.gnn.FeatureStore to store node/edge features. Supports - single-node/single-GPU, single-node/multi-GPU, and multi-node/multi-GPU - configurations. Supports both homogeneous and heterogeneous graphs. - """ - - # TODO allow (and possibly require) separate stores for node, edge attrs - # For now edge attrs are entirely unsupported. - # TODO add an "expensive check" argument that ensures the graph store - # and feature store are valid and compatible with PyG. - def __init__( - self, - F: cugraph.gnn.FeatureStore, - G: Union[ - Dict[Tuple[str, str, str], Tuple[TensorType]], - Dict[Tuple[str, str, str], int], - ], - num_nodes_dict: Dict[str, int], - *, - multi_gpu: bool = False, - order: str = "CSR", - ): - """ - Constructs a new DaskGraphStore from the provided - arguments. - - Parameters - ---------- - F: cugraph.gnn.FeatureStore (Required) - The feature store containing this graph's features. - Typed lexicographic-ordered numbering convention - should match that of the graph. - - G: dict[str, tuple[TensorType]] or dict[str, int] (Required) - Dictionary of edge indices. - Option 1 (graph in memory): - - Pass the edge indices: i.e. - { - ('author', 'writes', 'paper'): [[0,1,2],[2,0,1]], - ('author', 'affiliated', 'institution'): [[0,1],[0,1]] - } - - - Option 2 (graph not in memory): - - Pass the number of edges: i.e. - { - ('author', 'writes', 'paper'): 2, - ('author', 'affiliated', 'institution'): 2 - } - If the graph is not in memory, manipulating the edge indices - or calling sampling is not possible. This is for cases where - sampling has already been done and samples were written to disk. - - Note: the internal cugraph representation will use - offsetted vertex and edge ids. - - num_nodes_dict: dict (Required) - A dictionary mapping each node type to the count of nodes - of that type in the graph. - - multi_gpu: bool (Optional, default = False) - Whether the store should be backed by a multi-GPU graph. - Requires dask to have been set up. - - order: str (Optional ["CSR", "CSC"], default = CSR) - The order to use for sampling. CSR corresponds to the - standard OGB dataset order that is usually used in PyG. - CSC order constructs the same graph as CSR, but with - edges in the opposite direction. - """ - - if None in G: - raise ValueError("Unspecified edge types not allowed in PyG") - - if order != "CSR" and order != "CSC": - raise ValueError("invalid valid for order") - - self.__vertex_dtype = torch.int64 - - self._tensor_attr_cls = CuGraphTensorAttr - self._tensor_attr_dict = defaultdict(list) - - construct_graph = True - if isinstance(next(iter(G.values())), int): - # User has passed in the number of edges - # (not the actual edge index), so the number of edges - # does not need to be counted. - num_edges_dict = dict(G) # make sure the cugraph store owns this dict - construct_graph = False - else: - # User has passed in the actual edge index, so the - # number of edges needs to be counted. - num_edges_dict = { - pyg_can_edge_type: len(ei[0]) for pyg_can_edge_type, ei in G.items() - } - - self.__infer_offsets(num_nodes_dict, num_edges_dict) - self.__infer_existing_tensors(F) - self.__infer_edge_types(num_nodes_dict, num_edges_dict) - - self._edge_attr_cls = CuGraphEdgeAttr - - self.__features = F - self.__graph = None - self.__is_graph_owner = False - self.__order = order - - if construct_graph: - if multi_gpu: - self.__graph = distributed.get_client().get_dataset( - "cugraph_graph", default=None - ) - - if self.__graph is None: - self.__graph = self.__construct_graph( - G, multi_gpu=multi_gpu, order=order - ) - self.__is_graph_owner = True - - self.__subgraphs = {} - - def __del__(self): - if self.__is_graph_owner: - if isinstance(self.__graph._plc_graph, dict): - try: - distributed.get_client().unpublish_dataset("cugraph_graph") - except TypeError: - warnings.warn( - "Could not unpublish graph dataset, most likely because" - " dask has already shut down." - ) - del self.__graph - - def __make_offsets(self, input_dict): - offsets = {} - offsets["stop"] = [input_dict[v] for v in sorted(input_dict.keys())] - offsets["stop"] = torch.tensor(offsets["stop"]).cuda() - - cumsum = offsets["stop"].cumsum(0) - offsets["start"] = cumsum - offsets["stop"] - offsets["stop"] = cumsum - 1 - - offsets["type"] = np.array(sorted(input_dict.keys())) - - return offsets - - def __infer_offsets( - self, - num_nodes_dict: Dict[str, int], - num_edges_dict: Dict[Tuple[str, str, str], int], - ) -> None: - """ - Sets the vertex offsets for this store. - """ - self.__vertex_type_offsets = self.__make_offsets(num_nodes_dict) - - # Need to convert tuples to string in order to use searchsorted - # Can convert back using x.split('__') - # Lexicographic ordering is unchanged. - self.__edge_type_offsets = self.__make_offsets( - { - "__".join(pyg_can_edge_type): n - for pyg_can_edge_type, n in num_edges_dict.items() - } - ) - - def __dask_array_from_numpy(self, array: np.ndarray, npartitions: int): - return dar.from_array( - array, - meta=np.array([], dtype=array.dtype), - chunks=max(1, len(array) // npartitions), - ) - - def __construct_graph( - self, - edge_info: Dict[Tuple[str, str, str], List[TensorType]], - multi_gpu: bool = False, - order: str = "CSC", - ) -> cugraph.MultiGraph: - """ - This function takes edge information and uses it to construct - a cugraph Graph. It determines the numerical edge type by - sorting the keys of the input dictionary - (the canonical edge types). - - Parameters - ---------- - edge_info: Dict[Tuple[str, str, str], List[TensorType]] (Required) - Input edge info dictionary, where keys are the canonical - edge type and values are the edge index (src/dst). - - multi_gpu: bool (Optional, default=False) - Whether to construct a single-GPU or multi-GPU cugraph Graph. - Defaults to a single-GPU graph. - - order: str (CSC or CSR) - Essentially whether to reverse edges so that the cuGraph - sampling algorithm operates on the CSC matrix instead of - the CSR matrix. Should nearly always be CSC unless there - is a specific expectation of reverse sampling, or correctness - testing is being performed. - - Returns - ------- - A newly-constructed directed cugraph.MultiGraph object. - """ - - # Ensure the original dict is not modified. - edge_info_cg = {} - - if order != "CSR" and order != "CSC": - raise ValueError("Order must be either CSC (default) or CSR!") - - # Iterate over the keys in sorted order so that the created - # numerical types correspond to the lexicographic order - # of the keys, which is critical to converting the numeric - # keys back to canonical edge types later. - # FIXME don't always convert to host arrays (#3383) - for pyg_can_edge_type in sorted(edge_info.keys()): - src_type, _, dst_type = pyg_can_edge_type - srcs, dsts = edge_info[pyg_can_edge_type] - - src_offset = np.searchsorted(self.__vertex_type_offsets["type"], src_type) - srcs_t = srcs + int(self.__vertex_type_offsets["start"][src_offset]) - if isinstance(srcs_t, torch.Tensor): - srcs_t = srcs_t.cpu() - else: - if isinstance(srcs_t, dask_cudf.Series): - srcs_t = srcs_t.compute() - if isinstance(srcs_t, cudf.Series): - srcs_t = srcs_t.values_host - - dst_offset = np.searchsorted(self.__vertex_type_offsets["type"], dst_type) - dsts_t = dsts + int(self.__vertex_type_offsets["start"][dst_offset]) - if isinstance(dsts_t, torch.Tensor): - dsts_t = dsts_t.cpu() - else: - if isinstance(dsts_t, dask_cudf.Series): - dsts_t = dsts_t.compute() - if isinstance(dsts_t, cudf.Series): - dsts_t = dsts_t.values_host - - edge_info_cg[pyg_can_edge_type] = (srcs_t, dsts_t) - - na_src = np.concatenate( - [ - edge_info_cg[pyg_can_edge_type][0] - for pyg_can_edge_type in sorted(edge_info_cg.keys()) - ] - ) - - na_dst = np.concatenate( - [ - edge_info_cg[pyg_can_edge_type][1] - for pyg_can_edge_type in sorted(edge_info_cg.keys()) - ] - ) - - et_offsets = self.__edge_type_offsets - na_etp = np.concatenate( - [ - np.full( - int(et_offsets["stop"][i] - et_offsets["start"][i] + 1), - i, - dtype="int32", - ) - for i in range(len(self.__edge_type_offsets["start"])) - ] - ) - - vertex_dtype = na_src.dtype - - if multi_gpu: - client = distributed.get_client() - nworkers = len(client.scheduler_info()["workers"]) - npartitions = nworkers * 4 - - src_dar = self.__dask_array_from_numpy(na_src, npartitions) - del na_src - - dst_dar = self.__dask_array_from_numpy(na_dst, npartitions) - del na_dst - - etp_dar = self.__dask_array_from_numpy(na_etp, npartitions) - del na_etp - - df = dd.from_dask_array(etp_dar, columns=["etp"]) - df["src"] = dst_dar if order == "CSC" else src_dar - df["dst"] = src_dar if order == "CSC" else dst_dar - - del src_dar - del dst_dar - del etp_dar - - if df.etp.dtype != "int32": - raise ValueError("Edge type must be int32!") - - # Ensure the dataframe is constructed on each partition - # instead of adding additional synchronization head from potential - # host to device copies. - def get_empty_df(): - return cudf.DataFrame( - { - "etp": cudf.Series([], dtype="int32"), - "src": cudf.Series([], dtype=vertex_dtype), - "dst": cudf.Series([], dtype=vertex_dtype), - } - ) - - # Have to check for empty partitions and handle them appropriately - df = df.persist() - df = df.map_partitions( - lambda f: cudf.DataFrame.from_pandas(f) - if len(f) > 0 - else get_empty_df(), - meta=get_empty_df(), - ).reset_index( - drop=True - ) # should be ok for dask - else: - df = pandas.DataFrame( - { - "src": pandas.Series(na_dst) - if order == "CSC" - else pandas.Series(na_src), - "dst": pandas.Series(na_src) - if order == "CSC" - else pandas.Series(na_dst), - "etp": pandas.Series(na_etp), - } - ) - df = cudf.from_pandas(df) - df.reset_index(drop=True, inplace=True) - - graph = cugraph.MultiGraph(directed=True) - if multi_gpu: - graph.from_dask_cudf_edgelist( - df, - source="src", - destination="dst", - edge_type="etp", - ) - distributed.get_client().publish_dataset(cugraph_graph=graph) - else: - graph.from_cudf_edgelist( - df, - source="src", - destination="dst", - edge_type="etp", - ) - - del df - return graph - - @property - def _edge_types_to_attrs(self) -> dict: - return dict(self.__edge_types_to_attrs) - - @property - def order(self) -> str: - return self.__order - - @property - def node_types(self) -> List[NodeType]: - return list(self.__vertex_type_offsets["type"]) - - @property - def edge_types(self) -> List[EdgeType]: - return list(self.__edge_types_to_attrs.keys()) - - def canonical_edge_type_to_numeric(self, etype: EdgeType) -> int: - return np.searchsorted(self.__edge_type_offsets["type"], "__".join(etype)) - - def numeric_edge_type_to_canonical(self, etype: int) -> EdgeType: - return tuple(self.__edge_type_offsets["type"][etype].split("__")) - - @cached_property - def _is_delayed(self): - if self.__graph is None: - return False - return self.__graph.is_multi_gpu() - - def _numeric_vertex_type_from_name(self, vertex_type_name: str) -> int: - return np.searchsorted(self.__vertex_type_offsets["type"], vertex_type_name) - - def get_vertex_index(self, vtypes) -> TensorType: - if isinstance(vtypes, str): - vtypes = [vtypes] - - ix = torch.tensor([], dtype=torch.int64) - - if isinstance(self.__vertex_type_offsets, dict): - vtypes = np.searchsorted(self.__vertex_type_offsets["type"], vtypes) - for vtype in vtypes: - start = int(self.__vertex_type_offsets["start"][vtype]) - stop = int(self.__vertex_type_offsets["stop"][vtype]) - ix = torch.concatenate( - [ - ix, - torch.arange( - start, stop + 1, 1, dtype=self.__vertex_dtype, device="cuda" - ), - ] - ) - - return ix - - def put_edge_index(self, edge_index, edge_attr): - """ - Adds additional edges to the graph. - Not yet implemented. - """ - raise NotImplementedError("Adding indices not supported.") - - def get_all_edge_attrs(self): - """ - Gets a list of all edge types and indices in this store. - - Returns - ------- - list[str] - All edge types and indices in this store. - """ - return self.__edge_types_to_attrs.values() - - def _get_edge_index(self, attr: CuGraphEdgeAttr) -> Tuple[TensorType, TensorType]: - """ - Returns the edge index in the requested format - (as defined by attr). Currently, only unsorted - COO is supported, which is returned as a (src,dst) - tuple as expected by the PyG API. - - Parameters - ---------- - attr: CuGraphEdgeAttr - The CuGraphEdgeAttr specifying the - desired edge type, layout (i.e. CSR, COO, CSC), and - whether the returned index should be sorted (if COO). - Currently, only unsorted COO is supported. - - Returns - ------- - (src, dst) : Tuple[tensor type] - Tuple of the requested edge index in COO form. - Currently, only COO form is supported. - """ - - if self.__graph is None: - raise ValueError("Graph is not in memory, cannot access edge index!") - - if attr.layout != EdgeLayout.COO: - # TODO support returning CSR/CSC (Issue #3802) - raise TypeError("Only COO direct access is supported!") - - # Currently, graph creation enforces that input vertex ids are always of - # integer type. Therefore, it is currently safe to assume that for MG - # graphs, the src/dst col names are renumbered_src/dst - # and for SG graphs, the src/dst col names are src/dst. - # This may change in the future if/when renumbering or the graph - # creation process is refactored. - # See Issue #3201 for more details. - # Also note src/dst are flipped so that cuGraph sampling is done in - # CSC format rather than CSR format. - if self._is_delayed: - dst_col_name = self.__graph.renumber_map.renumbered_src_col_name - src_col_name = self.__graph.renumber_map.renumbered_dst_col_name - else: - dst_col_name = self.__graph.srcCol - src_col_name = self.__graph.dstCol - - # If there is only one edge type (homogeneous graph) then - # bypass the edge filters for a significant speed improvement. - if len(self.__edge_types_to_attrs) == 1: - if attr.edge_type not in self.__edge_types_to_attrs: - raise ValueError( - f"Requested edge type {attr.edge_type}" "is not present in graph." - ) - - df = self.__graph.edgelist.edgelist_df[[src_col_name, dst_col_name]] - src_offset = 0 - dst_offset = 0 - else: - src_type, _, dst_type = attr.edge_type - src_offset = int( - self.__vertex_type_offsets["start"][ - self._numeric_vertex_type_from_name(src_type) - ] - ) - dst_offset = int( - self.__vertex_type_offsets["start"][ - self._numeric_vertex_type_from_name(dst_type) - ] - ) - coli = np.searchsorted( - self.__edge_type_offsets["type"], "__".join(attr.edge_type) - ) - - df = self.__graph.edgelist.edgelist_df[ - [src_col_name, dst_col_name, self.__graph.edgeTypeCol] - ] - df = df[df[self.__graph.edgeTypeCol] == coli] - df = df[[src_col_name, dst_col_name]] - - if self._is_delayed: - df = df.compute() - - src = torch.as_tensor(df[src_col_name], device="cuda") - src_offset - dst = torch.as_tensor(df[dst_col_name], device="cuda") - dst_offset - - src = src.to(self.__vertex_dtype) - dst = dst.to(self.__vertex_dtype) - - if src.shape[0] != dst.shape[0]: - raise IndexError("src and dst shape do not match!") - - return (src, dst) - - def get_edge_index(self, *args, **kwargs) -> Tuple[TensorType, TensorType]: - """ - Synchronously gets an edge_index tensor from the materialized - graph. - - Args: - **attr(EdgeAttr): the edge attributes. - - Returns: - EdgeTensorType: an edge_index tensor corresonding to the provided - attributes, or None if there is no such tensor. - - Raises: - KeyError: if the edge index corresponding to attr was not found. - """ - - edge_attr = self._edge_attr_cls.cast(*args, **kwargs) - edge_attr.layout = EdgeLayout(edge_attr.layout) - # Override is_sorted for CSC and CSR: - # TODO treat is_sorted specially in this function, where is_sorted=True - # returns an edge index sorted by column. - edge_attr.is_sorted = edge_attr.is_sorted or ( - edge_attr.layout in [EdgeLayout.CSC, EdgeLayout.CSR] - ) - edge_index = self._get_edge_index(edge_attr) - if edge_index is None: - raise KeyError(f"An edge corresponding to '{edge_attr}' was not " f"found") - return edge_index - - def _subgraph(self, edge_types: List[tuple] = None) -> cugraph.MultiGraph: - """ - Returns a subgraph with edges limited to those of a given type - - Parameters - ---------- - edge_types : list of pyg canonical edge types - Directly references the graph's internal edge types. Does - not accept PyG edge type tuples. - - Returns - ------- - The appropriate extracted subgraph. Will extract the subgraph - if it has not already been extracted. - - """ - if self.__graph is None: - raise ValueError("Graph is not in memory, cannot get subgraph") - - if edge_types is not None and set(edge_types) != set( - self.__edge_types_to_attrs.keys() - ): - raise ValueError( - "Subgraphing is currently unsupported, please" - " specify all edge types in the graph or leave" - " this argument empty." - ) - - return self.__graph - - def _get_vertex_groups_from_sample( - self, nodes_of_interest: TensorType, is_sorted: bool = False - ) -> Dict[str, torch.Tensor]: - """ - Given a tensor of nodes of interest, this - method a single dictionary, noi_index. - - noi_index is the original vertex ids grouped by vertex type. - - Example Input: [5, 2, 1, 10, 11, 8] - Output: {'red_vertex': [5, 1, 8], 'blue_vertex': [2], 'green_vertex': [10, 11]} - - """ - - noi_index = {} - - vtypes = cudf.Series(self.__vertex_type_offsets["type"]) - if len(vtypes) == 1: - noi_index[vtypes.iloc[0]] = nodes_of_interest - else: - noi_type_indices = torch.searchsorted( - torch.as_tensor(self.__vertex_type_offsets["stop"], device="cuda"), - nodes_of_interest, - ) - - noi_types = vtypes.iloc[cupy.asarray(noi_type_indices)].reset_index( - drop=True - ) - noi_starts = self.__vertex_type_offsets["start"][noi_type_indices] - - noi_types = cudf.Series(noi_types, name="t").groupby("t").groups - - for type_name, ix in noi_types.items(): - # store the renumbering for this vertex type - # renumbered vertex id is the index of the old id - ix = torch.as_tensor(ix, device="cuda") - # subtract off the offsets - noi_index[type_name] = nodes_of_interest[ix] - noi_starts[ix] - - return noi_index - - def _get_sample_from_vertex_groups( - self, vertex_groups: Dict[str, TensorType] - ) -> TensorType: - """ - Inverse of _get_vertex_groups_from_sample() (although with de-offsetted ids). - Given a dictionary of node types and de-offsetted node ids, return - the global (non-renumbered) vertex ids. - - Example Input: {'horse': [1, 3, 5], 'duck': [1, 2]} - Output: [1, 3, 5, 14, 15] - """ - t = torch.tensor([], dtype=torch.int64, device="cuda") - - for group_name, ix in vertex_groups.items(): - type_id = self._numeric_vertex_type_from_name(group_name) - if not ix.is_cuda: - ix = ix.cuda() - offset = self.__vertex_type_offsets["start"][type_id] - u = ix + offset - t = torch.concatenate([t, u]) - - return t - - def _get_renumbered_edge_groups_from_sample( - self, sampling_results: cudf.DataFrame, noi_index: dict - ) -> Tuple[ - Dict[Tuple[str, str, str], torch.Tensor], - Tuple[Dict[Tuple[str, str, str], torch.Tensor]], - ]: - """ - Given a cudf (NOT dask_cudf) DataFrame of sampling results and a dictionary - of non-renumbered vertex ids grouped by vertex type, this method - outputs two dictionaries: - 1. row_dict - 2. col_dict - (1) row_dict corresponds to the renumbered source vertex ids grouped - by PyG edge type - (src, type, dst) tuple. - (2) col_dict corresponds to the renumbered destination vertex ids grouped - by PyG edge type (src, type, dst) tuple. - * The two outputs combined make a PyG "edge index". - * The ith element of each array corresponds to the same edge. - * The _get_vertex_groups_from_sample() method is usually called - before this one to get the noi_index. - - Example Input: Series({ - 'majors': [0, 5, 11, 3], - 'minors': [8, 2, 3, 5]}, - 'edge_type': [1, 3, 5, 14] - }), - { - 'blue_vertex': [0, 5], - 'red_vertex': [3, 11], - 'green_vertex': [2, 8] - } - Output: { - ('blue', 'etype1', 'green'): [0, 1], - ('red', 'etype2', 'red'): [1], - ('red', 'etype3', 'blue'): [0] - }, - { - ('blue', 'etype1', 'green'): [1, 0], - ('red', 'etype2', 'red'): [0], - ('red', 'etype3', 'blue'): [1] - } - - """ - row_dict = {} - col_dict = {} - # If there is only 1 edge type (includes heterogeneous graphs) - if len(self.edge_types) == 1: - t_pyg_type = list(self.__edge_types_to_attrs.values())[0].edge_type - src_type, _, dst_type = t_pyg_type - - # If there is only 1 node type (homogeneous) - # This should only occur if the cuGraph loader was - # not used. This logic is deprecated. - if len(self.node_types) == 1: - warnings.warn( - "Renumbering after sampling for homogeneous graphs is deprecated.", - FutureWarning, - ) - - # Create a dataframe mapping old ids to new ids. - vtype = src_type - id_table = noi_index[vtype] - id_map = cudf.Series( - cupy.arange(id_table.shape[0], dtype="int32"), - name="new_id", - index=cupy.asarray(id_table), - ).sort_index() - - # Renumber the majors using binary search - # Step 1: get the index of the new id - ix_r = torch.searchsorted( - torch.as_tensor(id_map.index.values, device="cuda"), - torch.as_tensor(sampling_results.majors.values, device="cuda"), - ) - # Step 2: Go from id indices to actual ids - row_dict[t_pyg_type] = torch.as_tensor(id_map.values, device="cuda")[ - ix_r - ] - - # Renumber the minors using binary search - # Step 1: get the index of the new id - ix_c = torch.searchsorted( - torch.as_tensor(id_map.index.values, device="cuda"), - torch.as_tensor(sampling_results.minors.values, device="cuda"), - ) - # Step 2: Go from id indices to actual ids - col_dict[t_pyg_type] = torch.as_tensor(id_map.values, device="cuda")[ - ix_c - ] - else: - # Handle the heterogeneous case where there is only 1 edge type - dst_id_table = noi_index[dst_type] - dst_id_map = cudf.DataFrame( - { - "dst": cupy.asarray(dst_id_table), - "new_id": cupy.arange(dst_id_table.shape[0]), - } - ).set_index("dst") - dst = dst_id_map["new_id"].loc[sampling_results.minors] - col_dict[t_pyg_type] = torch.as_tensor(dst.values, device="cuda") - - src_id_table = noi_index[src_type] - src_id_map = cudf.DataFrame( - { - "src": cupy.asarray(src_id_table), - "new_id": cupy.arange(src_id_table.shape[0]), - } - ).set_index("src") - src = src_id_map["new_id"].loc[sampling_results.majors] - row_dict[t_pyg_type] = torch.as_tensor(src.values, device="cuda") - - else: - # This will retrieve the single string representation. - # It needs to be converted to a tuple in the for loop below. - eoi_types = ( - cudf.Series(self.__edge_type_offsets["type"]) - .iloc[sampling_results.edge_type.astype("int32")] - .reset_index(drop=True) - ) - - eoi_types = cudf.Series(eoi_types, name="t").groupby("t").groups - - for pyg_can_edge_type_str, ix in eoi_types.items(): - pyg_can_edge_type = tuple(pyg_can_edge_type_str.split("__")) - - if self.__order == "CSR": - src_type, _, dst_type = pyg_can_edge_type - else: # CSC - dst_type, _, src_type = pyg_can_edge_type - - # Get the de-offsetted minors - dst_num_type = self._numeric_vertex_type_from_name(dst_type) - minors = torch.as_tensor( - sampling_results.minors.iloc[ix].values, device="cuda" - ) - minors -= self.__vertex_type_offsets["start"][dst_num_type] - - # Create the col entry for this type - dst_id_table = noi_index[dst_type] - dst_id_map = ( - cudf.Series(cupy.asarray(dst_id_table), name="dst") - .reset_index() - .rename(columns={"index": "new_id"}) - .set_index("dst") - ) - dst = dst_id_map["new_id"].loc[cupy.asarray(minors)] - col_dict[pyg_can_edge_type] = torch.as_tensor(dst.values, device="cuda") - - # Get the de-offsetted majors - src_num_type = self._numeric_vertex_type_from_name(src_type) - majors = torch.as_tensor( - sampling_results.majors.iloc[ix].values, device="cuda" - ) - majors -= self.__vertex_type_offsets["start"][src_num_type] - - # Create the row entry for this type - src_id_table = noi_index[src_type] - src_id_map = ( - cudf.Series(cupy.asarray(src_id_table), name="src") - .reset_index() - .rename(columns={"index": "new_id"}) - .set_index("src") - ) - src = src_id_map["new_id"].loc[cupy.asarray(majors)] - row_dict[pyg_can_edge_type] = torch.as_tensor(src.values, device="cuda") - - return row_dict, col_dict - - def put_tensor(self, tensor, attr) -> None: - raise NotImplementedError("Adding properties not supported.") - - def create_named_tensor( - self, attr_name: str, properties: List[str], vertex_type: str, dtype: str - ) -> None: - """ - Create a named tensor that contains a subset of - properties in the graph. - - Parameters - ---------- - attr_name : str - The name of the tensor within its group. - properties : list[str] - The properties the rows - of the tensor correspond to. - vertex_type : str - The vertex type associated with this new tensor property. - dtype : numpy/cupy dtype (i.e. 'int32') or torch dtype (i.e. torch.float) - The datatype of the tensor. Usually float32/float64. - """ - self._tensor_attr_dict[vertex_type].append( - CuGraphTensorAttr( - vertex_type, attr_name, properties=properties, dtype=dtype - ) - ) - - def __infer_edge_types( - self, - num_nodes_dict: Dict[str, int], - num_edges_dict: Dict[Tuple[str, str, str], int], - ) -> None: - self.__edge_types_to_attrs = {} - - for pyg_can_edge_type in sorted(num_edges_dict.keys()): - sz_src = num_nodes_dict[pyg_can_edge_type[0]] - sz_dst = num_nodes_dict[pyg_can_edge_type[-1]] - self.__edge_types_to_attrs[pyg_can_edge_type] = CuGraphEdgeAttr( - edge_type=pyg_can_edge_type, - layout=EdgeLayout.COO, - is_sorted=False, - size=(sz_src, sz_dst), - ) - - def __infer_existing_tensors(self, F) -> None: - """ - Infers the tensor attributes/features. - """ - for attr_name, types_with_attr in F.get_feature_list().items(): - for vt in types_with_attr: - attr_dtype = F.get_data(np.array([0]), vt, attr_name).dtype - self.create_named_tensor( - attr_name=attr_name, - properties=None, - vertex_type=vt, - dtype=attr_dtype, - ) - - def get_all_tensor_attrs(self) -> List[CuGraphTensorAttr]: - """ - Obtains all tensor attributes stored in this feature store. - """ - # unpack and return the list of lists - it = chain.from_iterable(self._tensor_attr_dict.values()) - return [CuGraphTensorAttr.cast(c) for c in it] - - def _get_tensor(self, attr: CuGraphTensorAttr) -> TensorType: - feature_backend = self.__features.backend - cols = attr.properties - - idx = attr.index - if idx is not None: - if feature_backend in ["torch", "wholegraph"]: - if not isinstance(idx, torch.Tensor): - raise TypeError( - f"Type {type(idx)} invalid" - f" for feature store backend {feature_backend}" - ) - elif feature_backend == "numpy": - # allow feature indexing through cupy arrays - if isinstance(idx, cupy.ndarray): - idx = idx.get() - elif isinstance(idx, torch.Tensor): - idx = np.asarray(idx.cpu()) - - if cols is None: - t = self.__features.get_data(idx, attr.group_name, attr.attr_name) - if idx is None: - t = t[-1] - - if isinstance(t, np.ndarray): - t = torch.as_tensor(t, device="cpu") - - return t - - else: - t = self.__features.get_data(idx, attr.group_name, cols[0]) - - if len(t.shape) == 1: - t = torch.tensor([t]) - - for col in cols[1:]: - u = self.__features.get_data(idx, attr.group_name, col) - - if len(u.shape) == 1: - u = torch.tensor([u]) - - t = torch.concatenate([t, u]) - - return t - - def _multi_get_tensor(self, attrs: List[CuGraphTensorAttr]) -> List[TensorType]: - return [self._get_tensor(attr) for attr in attrs] - - def multi_get_tensor(self, attrs: List[CuGraphTensorAttr]) -> List[TensorType]: - """ - Synchronously obtains a :class:`FeatureTensorType` object from the - feature store for each tensor associated with the attributes in - `attrs`. - - Parameters - ---------- - attrs (List[TensorAttr]): a list of :class:`TensorAttr` attributes - that identify the tensors to get. - - Returns - ------- - List[FeatureTensorType]: a Tensor of the same type as the index for - each attribute. - - Raises - ------ - KeyError: if a tensor corresponding to an attr was not found. - ValueError: if any input `TensorAttr` is not fully specified. - """ - attrs = [ - self._infer_unspecified_attr(self._tensor_attr_cls.cast(attr)) - for attr in attrs - ] - bad_attrs = [attr for attr in attrs if not attr.is_fully_specified()] - if len(bad_attrs) > 0: - raise ValueError( - f"The input TensorAttr(s) '{bad_attrs}' are not fully " - f"specified. Please fully specify them by specifying all " - f"'UNSET' fields" - ) - - tensors = self._multi_get_tensor(attrs) - - bad_attrs = [attrs[i] for i, v in enumerate(tensors) if v is None] - if len(bad_attrs) > 0: - raise KeyError( - f"Tensors corresponding to attributes " f"'{bad_attrs}' were not found" - ) - - return [tensor for attr, tensor in zip(attrs, tensors)] - - def get_tensor(self, *args, **kwargs) -> TensorType: - """ - Synchronously obtains a :class:`FeatureTensorType` object from the - feature store. Feature store implementors guarantee that the call - :obj:`get_tensor(put_tensor(tensor, attr), attr) = tensor` holds. - - Parameters - ---------- - **attr (TensorAttr): Any relevant tensor attributes that correspond - to the feature tensor. See the :class:`TensorAttr` - documentation for required and optional attributes. It is the - job of implementations of a :class:`FeatureStore` to store this - metadata in a meaningful way that allows for tensor retrieval - from a :class:`TensorAttr` object. - - Returns - ------- - FeatureTensorType: a Tensor of the same type as the index. - - Raises - ------ - KeyError: if the tensor corresponding to attr was not found. - ValueError: if the input `TensorAttr` is not fully specified. - """ - - attr = self._tensor_attr_cls.cast(*args, **kwargs) - attr = self._infer_unspecified_attr(attr) - - if not attr.is_fully_specified(): - raise ValueError( - f"The input TensorAttr '{attr}' is not fully " - f"specified. Please fully specify the input by " - f"specifying all 'UNSET' fields." - ) - - tensor = self._get_tensor(attr) - if tensor is None: - raise KeyError(f"A tensor corresponding to '{attr}' was not found") - return tensor - - def _get_tensor_size(self, attr: CuGraphTensorAttr) -> Union[List, int]: - return self._get_tensor(attr).size() - - def get_tensor_size(self, *args, **kwargs) -> Union[List, int]: - """ - Obtains the size of a tensor given its attributes, or :obj:`None` - if the tensor does not exist. - """ - attr = self._tensor_attr_cls.cast(*args, **kwargs) - if not attr.is_set("index"): - attr.index = None - return self._get_tensor_size(attr) - - def _remove_tensor(self, attr): - raise NotImplementedError("Removing features not supported") - - def _infer_unspecified_attr(self, attr: CuGraphTensorAttr) -> CuGraphTensorAttr: - if attr.properties == _field_status.UNSET: - # attempt to infer property names - if attr.group_name in self._tensor_attr_dict: - for n in self._tensor_attr_dict[attr.group_name]: - if attr.attr_name == n.attr_name: - attr.properties = n.properties - else: - raise KeyError(f"Invalid group name {attr.group_name}") - - if attr.dtype == _field_status.UNSET: - # attempt to infer dtype - if attr.group_name in self._tensor_attr_dict: - for n in self._tensor_attr_dict[attr.group_name]: - if attr.attr_name == n.attr_name: - attr.dtype = n.dtype - - return attr - - def filter( - self, - format: str, - node_dict: Dict[str, torch.Tensor], - row_dict: Dict[str, torch.Tensor], - col_dict: Dict[str, torch.Tensor], - edge_dict: Dict[str, Tuple[torch.Tensor]], - ) -> torch_geometric.data.HeteroData: - """ - Parameters - ---------- - format: str - COO or CSC - node_dict: Dict[str, torch.Tensor] - IDs of nodes in original store being outputted - row_dict: Dict[str, torch.Tensor] - Renumbered output edge index row - col_dict: Dict[str, torch.Tensor] - Renumbered output edge index column - edge_dict: Dict[str, Tuple[torch.Tensor]] - Currently unused original edge mapping - """ - data = torch_geometric.data.HeteroData() - - # TODO use torch_geometric.EdgeIndex in release 24.04 (Issue #4051) - for attr in self.get_all_edge_attrs(): - key = attr.edge_type - if key in row_dict and key in col_dict: - if format == "CSC": - data.put_edge_index( - (row_dict[key], col_dict[key]), - edge_type=key, - layout="csc", - is_sorted=True, - ) - else: - data[key].edge_index = torch.stack( - [ - row_dict[key], - col_dict[key], - ], - dim=0, - ) - - required_attrs = [] - # To prevent copying multiple times, we use a cache; - # the original node_dict serves as the gpu cache if needed - node_dict_cpu = {} - for attr in self.get_all_tensor_attrs(): - if attr.group_name in node_dict: - device = self.__features.get_storage(attr.group_name, attr.attr_name) - attr.index = node_dict[attr.group_name] - if not isinstance(attr.index, torch.Tensor): - raise ValueError("Node index must be a tensor!") - if attr.index.is_cuda and device == "cpu": - if attr.group_name not in node_dict_cpu: - node_dict_cpu[attr.group_name] = attr.index.cpu() - attr.index = node_dict_cpu[attr.group_name] - elif attr.index.is_cpu and device == "cuda": - node_dict_cpu[attr.group_name] = attr.index - node_dict[attr.group_name] = attr.index.cuda() - attr.index = node_dict[attr.group_name] - - required_attrs.append(attr) - data[attr.group_name].num_nodes = attr.index.size(0) - - tensors = self.multi_get_tensor(required_attrs) - for i, attr in enumerate(required_attrs): - data[attr.group_name][attr.attr_name] = tensors[i] - - return data - - def __len__(self): - return len(self.get_all_tensor_attrs()) diff --git a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py b/python/cugraph-pyg/cugraph_pyg/data/feature_store.py deleted file mode 100644 index b6450e7b192..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/data/feature_store.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from typing import Optional, Tuple, List - -from cugraph.utilities.utils import import_optional, MissingModule - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") -tensordict = import_optional("tensordict") -wgth = import_optional("pylibwholegraph.torch") - - -class TensorDictFeatureStore( - object - if isinstance(torch_geometric, MissingModule) - else torch_geometric.data.FeatureStore -): - """ - A basic implementation of the PyG FeatureStore interface that stores - feature data in a single TensorDict. This type of feature store is - not distributed, so each node will have to load the entire graph's - features into memory. - """ - - def __init__(self): - """ - Constructs an empty TensorDictFeatureStore. - """ - super().__init__() - - self.__features = {} - - def _put_tensor( - self, - tensor: "torch_geometric.typing.FeatureTensorType", - attr: "torch_geometric.data.feature_store.TensorAttr", - ) -> bool: - if attr.group_name in self.__features: - td = self.__features[attr.group_name] - batch_size = td.batch_size[0] - - if attr.is_set("index"): - if attr.attr_name in td.keys(): - if attr.index.shape[0] != batch_size: - raise ValueError( - "Leading size of index tensor " - "does not match existing tensors for group name " - f"{attr.group_name}; Expected {batch_size}, " - f"got {attr.index.shape[0]}" - ) - td[attr.attr_name][attr.index] = tensor - return True - else: - warnings.warn( - "Ignoring index parameter " - f"(attribute does not exist for group {attr.group_name})" - ) - - if tensor.shape[0] != batch_size: - raise ValueError( - "Leading size of input tensor does not match " - f"existing tensors for group name {attr.group_name};" - f" Expected {batch_size}, got {tensor.shape[0]}" - ) - else: - batch_size = tensor.shape[0] - self.__features[attr.group_name] = tensordict.TensorDict( - {}, batch_size=batch_size - ) - - self.__features[attr.group_name][attr.attr_name] = tensor - return True - - def _get_tensor( - self, attr: "torch_geometric.data.feature_store.TensorAttr" - ) -> Optional["torch_geometric.typing.FeatureTensorType"]: - if attr.group_name not in self.__features: - return None - - if attr.attr_name not in self.__features[attr.group_name].keys(): - return None - - tensor = self.__features[attr.group_name][attr.attr_name] - return ( - tensor - if (attr.index is None or (not attr.is_set("index"))) - else tensor[attr.index] - ) - - def _remove_tensor( - self, attr: "torch_geometric.data.feature_store.TensorAttr" - ) -> bool: - if attr.group_name not in self.__features: - return False - - if attr.attr_name not in self.__features[attr.group_name].keys(): - return False - - del self.__features[attr.group_name][attr.attr_name] - return True - - def _get_tensor_size( - self, attr: "torch_geometric.data.feature_store.TensorAttr" - ) -> Tuple: - return self._get_tensor(attr).size() - - def get_all_tensor_attrs( - self, - ) -> List["torch_geometric.data.feature_store.TensorAttr"]: - attrs = [] - for group_name, td in self.__features.items(): - for attr_name in td.keys(): - attrs.append( - torch_geometric.data.feature_store.TensorAttr( - group_name, - attr_name, - ) - ) - - return attrs - - -class WholeFeatureStore( - object - if isinstance(torch_geometric, MissingModule) - else torch_geometric.data.FeatureStore -): - """ - A basic implementation of the PyG FeatureStore interface that stores - feature data in WholeGraph WholeMemory. This type of feature store is - distributed, and avoids data replication across workers. - - Data should be sliced before being passed into this feature store. - That means each worker should have its own partition and put_tensor - should be called for each worker's local partition. When calling - get_tensor, multi_get_tensor, etc., the entire tensor can be accessed - regardless of what worker's partition the desired slice of the tensor - is on. - """ - - def __init__(self, memory_type="distributed", location="cpu"): - """ - Constructs an empty WholeFeatureStore. - - Parameters - ---------- - memory_type: str (optional, default='distributed') - The memory type of this store. Options are - 'distributed', 'chunked', and 'continuous'. - For more information consult the WholeGraph - documentation. - location: str(optional, default='cpu') - The location ('cpu' or 'cuda') where data is stored. - """ - super().__init__() - - self.__features = {} - - self.__wg_comm = wgth.get_global_communicator() - self.__wg_type = memory_type - self.__wg_location = location - - def _put_tensor( - self, - tensor: "torch_geometric.typing.FeatureTensorType", - attr: "torch_geometric.data.feature_store.TensorAttr", - ) -> bool: - wg_comm_obj = self.__wg_comm - - if attr.is_set("index"): - if (attr.group_name, attr.attr_name) in self.__features: - raise NotImplementedError( - "Updating an embedding from an index" - " is not supported by WholeGraph." - ) - else: - warnings.warn( - "Ignoring index parameter " - f"(attribute does not exist for group {attr.group_name})" - ) - - if len(tensor.shape) > 2: - raise ValueError("Only 1-D or 2-D tensors are supported by WholeGraph.") - - rank = torch.distributed.get_rank() - world_size = torch.distributed.get_world_size() - - ld = torch.tensor(tensor.shape[0], device="cuda", dtype=torch.int64) - sizes = torch.empty((world_size,), device="cuda", dtype=torch.int64) - torch.distributed.all_gather_into_tensor(sizes, ld) - - sizes = sizes.cpu() - ld = sizes.sum() - - td = -1 if len(tensor.shape) == 1 else tensor.shape[1] - global_shape = [ - int(ld), - td if td > 0 else 1, - ] - - if td < 0: - tensor = tensor.reshape((tensor.shape[0], 1)) - - wg_embedding = wgth.create_wholememory_tensor( - wg_comm_obj, - self.__wg_type, - self.__wg_location, - global_shape, - tensor.dtype, - [global_shape[1], 1], - ) - - offset = sizes[:rank].sum() if rank > 0 else 0 - - wg_embedding.scatter( - tensor.clone(memory_format=torch.contiguous_format).cuda(), - torch.arange( - offset, offset + tensor.shape[0], dtype=torch.int64, device="cuda" - ).contiguous(), - ) - - wg_comm_obj.barrier() - - self.__features[attr.group_name, attr.attr_name] = (wg_embedding, td) - return True - - def _get_tensor( - self, attr: "torch_geometric.data.feature_store.TensorAttr" - ) -> Optional["torch_geometric.typing.FeatureTensorType"]: - if (attr.group_name, attr.attr_name) not in self.__features: - return None - - emb, td = self.__features[attr.group_name, attr.attr_name] - - if attr.index is None or (not attr.is_set("index")): - attr.index = torch.arange(emb.shape[0], dtype=torch.int64) - - attr.index = attr.index.cuda() - t = emb.gather( - attr.index, - force_dtype=emb.dtype, - ) - - if td < 0: - t = t.reshape((t.shape[0],)) - - return t - - def _remove_tensor( - self, attr: "torch_geometric.data.feature_store.TensorAttr" - ) -> bool: - if (attr.group_name, attr.attr_name) not in self.__features: - return False - - del self.__features[attr.group_name, attr.attr_name] - return True - - def _get_tensor_size( - self, attr: "torch_geometric.data.feature_store.TensorAttr" - ) -> Tuple: - return self.__features[attr.group_name, attr.attr_name].shape - - def get_all_tensor_attrs( - self, - ) -> List["torch_geometric.data.feature_store.TensorAttr"]: - attrs = [] - for (group_name, attr_name) in self.__features.keys(): - attrs.append( - torch_geometric.data.feature_store.TensorAttr( - group_name=group_name, - attr_name=attr_name, - ) - ) - - return attrs diff --git a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py deleted file mode 100644 index c47dda5eaa5..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py +++ /dev/null @@ -1,365 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import cupy -import cudf -import pandas - -import pylibcugraph - -from cugraph.utilities.utils import import_optional, MissingModule -from cugraph.gnn.comms import cugraph_comms_get_raft_handle - -from typing import Union, Optional, List, Dict, Tuple - - -# Have to use import_optional even though these are required -# dependencies in order to build properly. -torch_geometric = import_optional("torch_geometric") -torch = import_optional("torch") -tensordict = import_optional("tensordict") - -TensorType = Union["torch.Tensor", cupy.ndarray, np.ndarray, cudf.Series, pandas.Series] - - -class GraphStore( - object - if isinstance(torch_geometric, MissingModule) - else torch_geometric.data.GraphStore -): - """ - cuGraph-backed PyG GraphStore implementation that distributes - the graph across workers. This object uses lazy graph creation. - Users can repeatedly call put_edge_index, and the tensors won't - be converted into a cuGraph graph until one is needed - (i.e. when creating a loader). Supports - single-node/single-GPU, single-node/multi-GPU, and - multi-node/multi-GPU graph storage. - - Each worker should have a slice of the graph locally, and - call put_edge_index with its slice. - """ - - def __init__(self, is_multi_gpu: bool = False): - """ - Constructs a new, empty GraphStore object. This object - represents one slice of a graph on particular worker. - """ - self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) - self.__sizes = {} - - self.__handle = None - self.__is_multi_gpu = is_multi_gpu - - self.__clear_graph() - - super().__init__() - - def __clear_graph(self): - self.__graph = None - self.__vertex_offsets = None - self.__weight_attr = None - - def _put_edge_index( - self, - edge_index: "torch_geometric.typing.EdgeTensorType", - edge_attr: "torch_geometric.data.EdgeAttr", - ) -> bool: - if edge_attr.layout != torch_geometric.data.graph_store.EdgeLayout.COO: - raise ValueError("Only COO format supported") - - if isinstance(edge_index, (cupy.ndarray, cudf.Series)): - edge_index = torch.as_tensor(edge_index, device="cuda") - elif isinstance(edge_index, (np.ndarray)): - edge_index = torch.as_tensor(edge_index, device="cpu") - elif isinstance(edge_index, pandas.Series): - edge_index = torch.as_tensor(edge_index.values, device="cpu") - elif isinstance(edge_index, cudf.Series): - edge_index = torch.as_tensor(edge_index.values, device="cuda") - - self.__edge_indices[edge_attr.edge_type] = torch.stack( - [edge_index[0], edge_index[1]] - ) - self.__sizes[edge_attr.edge_type] = edge_attr.size - - # invalidate the graph - self.__clear_graph() - return True - - def _get_edge_index( - self, edge_attr: "torch_geometric.data.EdgeAttr" - ) -> Optional["torch_geometric.typing.EdgeTensorType"]: - ei = torch_geometric.EdgeIndex(self.__edge_indices[edge_attr.edge_type]) - - if edge_attr.layout == "csr": - return ei.sort_by("row").values.get_csr() - elif edge_attr.layout == "csc": - return ei.sort_by("col").values.get_csc() - - return ei - - def _remove_edge_index(self, edge_attr: "torch_geometric.data.EdgeAttr") -> bool: - del self.__edge_indices[edge_attr.edge_type] - - # invalidate the graph - self.__clear_graph() - return True - - def get_all_edge_attrs(self) -> List["torch_geometric.data.EdgeAttr"]: - attrs = [] - for et in self.__edge_indices.keys(leaves_only=True, include_nested=True): - attrs.append( - torch_geometric.data.EdgeAttr( - edge_type=et, layout="coo", is_sorted=False, size=self.__sizes[et] - ) - ) - - return attrs - - @property - def is_multi_gpu(self): - return self.__is_multi_gpu - - @property - def _resource_handle(self): - if self.__handle is None: - if self.is_multi_gpu: - self.__handle = pylibcugraph.ResourceHandle( - cugraph_comms_get_raft_handle().getHandle() - ) - else: - self.__handle = pylibcugraph.ResourceHandle() - return self.__handle - - @property - def _graph(self) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: - graph_properties = pylibcugraph.GraphProperties( - is_multigraph=True, is_symmetric=False - ) - - if self.__graph is None: - edgelist_dict = self.__get_edgelist() - - if self.is_multi_gpu: - rank = torch.distributed.get_rank() - world_size = torch.distributed.get_world_size() - - vertices_array = cupy.arange( - sum(self._num_vertices().values()), dtype="int64" - ) - vertices_array = cupy.array_split(vertices_array, world_size)[rank] - - self.__graph = pylibcugraph.MGGraph( - self._resource_handle, - graph_properties, - [cupy.asarray(edgelist_dict["src"]).astype("int64")], - [cupy.asarray(edgelist_dict["dst"]).astype("int64")], - vertices_array=[vertices_array], - edge_id_array=[cupy.asarray(edgelist_dict["eid"])], - edge_type_array=[cupy.asarray(edgelist_dict["etp"])], - weight_array=[cupy.asarray(edgelist_dict["wgt"])] - if "wgt" in edgelist_dict - else None, - ) - else: - self.__graph = pylibcugraph.SGGraph( - self._resource_handle, - graph_properties, - cupy.asarray(edgelist_dict["src"]).astype("int64"), - cupy.asarray(edgelist_dict["dst"]).astype("int64"), - vertices_array=cupy.arange( - sum(self._num_vertices().values()), dtype="int64" - ), - edge_id_array=cupy.asarray(edgelist_dict["eid"]), - edge_type_array=cupy.asarray(edgelist_dict["etp"]), - weight_array=cupy.asarray(edgelist_dict["wgt"]) - if "wgt" in edgelist_dict - else None, - ) - - return self.__graph - - def _num_vertices(self) -> Dict[str, int]: - num_vertices = {} - for edge_attr in self.get_all_edge_attrs(): - if edge_attr.size is not None: - num_vertices[edge_attr.edge_type[0]] = ( - max(num_vertices[edge_attr.edge_type[0]], edge_attr.size[0]) - if edge_attr.edge_type[0] in num_vertices - else edge_attr.size[0] - ) - num_vertices[edge_attr.edge_type[2]] = ( - max(num_vertices[edge_attr.edge_type[2]], edge_attr.size[1]) - if edge_attr.edge_type[2] in num_vertices - else edge_attr.size[1] - ) - else: - if edge_attr.edge_type[0] != edge_attr.edge_type[2]: - if edge_attr.edge_type[0] not in num_vertices: - num_vertices[edge_attr.edge_type[0]] = int( - self.__edge_indices[edge_attr.edge_type][0].max() + 1 - ) - if edge_attr.edge_type[2] not in num_vertices: - num_vertices[edge_attr.edge_type[1]] = int( - self.__edge_indices[edge_attr.edge_type][1].max() + 1 - ) - elif edge_attr.edge_type[0] not in num_vertices: - num_vertices[edge_attr.edge_type[0]] = int( - self.__edge_indices[edge_attr.edge_type].max() + 1 - ) - - if self.is_multi_gpu: - vtypes = num_vertices.keys() - for vtype in vtypes: - sz = torch.tensor(num_vertices[vtype], device="cuda") - torch.distributed.all_reduce(sz, op=torch.distributed.ReduceOp.MAX) - num_vertices[vtype] = int(sz) - return num_vertices - - @property - def _vertex_offsets(self) -> Dict[str, int]: - if self.__vertex_offsets is None: - num_vertices = self._num_vertices() - ordered_keys = sorted(list(num_vertices.keys())) - self.__vertex_offsets = {} - offset = 0 - for vtype in ordered_keys: - self.__vertex_offsets[vtype] = offset - offset += num_vertices[vtype] - - return dict(self.__vertex_offsets) - - @property - def is_homogeneous(self) -> bool: - return len(self._vertex_offsets) == 1 - - def _set_weight_attr(self, attr: Tuple["torch_geometric.data.FeatureStore", str]): - if attr != self.__weight_attr: - self.__clear_graph() - self.__weight_attr = attr - - def __get_weight_tensor( - self, - sorted_keys: List[Tuple[str, str, str]], - start_offsets: "torch.Tensor", - num_edges_t: "torch.Tensor", - ): - feature_store, attr_name = self.__weight_attr - - weights = [] - for i, et in enumerate(sorted_keys): - ix = torch.arange( - start_offsets[i], - start_offsets[i] + num_edges_t[i], - dtype=torch.int64, - device="cpu", - ) - - weights.append(feature_store[et, attr_name][ix]) - - return torch.concat(weights) - - def __get_edgelist(self): - """ - Returns - ------- - Dict[str, torch.Tensor] with the following keys: - src: source vertices (int64) - Note that src is the 2nd element of the PyG edge index. - dst: destination vertices (int64) - Note that dst is the 1st element of the PyG edge index. - eid: edge ids for each edge (int64) - Note that these start from 0 for each edge type. - etp: edge types for each edge (int32) - Note that these are in lexicographic order. - """ - sorted_keys = sorted( - list(self.__edge_indices.keys(leaves_only=True, include_nested=True)) - ) - - # note that this still follows the PyG convention of (dst, rel, src) - # i.e. (author, writes, paper): [[0,1,2],[2,0,1]] is referring to a - # cuGraph graph where (paper 2) -> (author 0), (paper 0) -> (author 1), - # and (paper 1) -> (author 0) - edge_index = torch.concat( - [ - torch.stack( - [ - self.__edge_indices[dst_type, rel_type, src_type][0] - + self._vertex_offsets[dst_type], - self.__edge_indices[dst_type, rel_type, src_type][1] - + self._vertex_offsets[src_type], - ] - ) - for (dst_type, rel_type, src_type) in sorted_keys - ], - axis=1, - ).cuda() - - edge_type_array = torch.arange( - len(sorted_keys), dtype=torch.int32, device="cuda" - ).repeat_interleave( - torch.tensor( - [self.__edge_indices[et].shape[1] for et in sorted_keys], - device="cuda", - dtype=torch.int64, - ) - ) - - num_edges_t = torch.tensor( - [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" - ) - - if self.is_multi_gpu: - rank = torch.distributed.get_rank() - world_size = torch.distributed.get_world_size() - - num_edges_all_t = torch.empty( - world_size, num_edges_t.numel(), dtype=torch.int64, device="cuda" - ) - torch.distributed.all_gather_into_tensor(num_edges_all_t, num_edges_t) - - start_offsets = num_edges_all_t[:rank].T.sum(axis=1) - else: - rank = 0 - start_offsets = torch.zeros( - (len(sorted_keys),), dtype=torch.int64, device="cuda" - ) - num_edges_all_t = num_edges_t.reshape((1, num_edges_t.numel())) - - edge_id_array = torch.concat( - [ - torch.arange( - start_offsets[i], - start_offsets[i] + num_edges_all_t[rank][i], - dtype=torch.int64, - device="cuda", - ) - for i in range(len(sorted_keys)) - ] - ) - - d = { - "dst": edge_index[0], - "src": edge_index[1], - "etp": edge_type_array, - "eid": edge_id_array, - } - - if self.__weight_attr is not None: - d["wgt"] = self.__get_weight_tensor( - sorted_keys, start_offsets.cpu(), num_edges_t.cpu() - ).cuda() - - return d diff --git a/python/cugraph-pyg/cugraph_pyg/examples/cugraph_dist_sampling_mg.py b/python/cugraph-pyg/cugraph_pyg/examples/cugraph_dist_sampling_mg.py deleted file mode 100644 index 31cbaf69ca5..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/cugraph_dist_sampling_mg.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This example shows how to use cuGraph nccl-only comms, pylibcuGraph, -# and PyTorch DDP to run a multi-GPU sampling workflow. Most users of the -# GNN packages will not interact with cuGraph directly. This example -# is intented for users who want to extend cuGraph within a DDP workflow. - -import os -import re -import tempfile - -import numpy as np -import torch -import torch.multiprocessing as tmp -import torch.distributed as dist - -import cudf - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, - cugraph_comms_get_raft_handle, - DistSampleWriter, - UniformNeighborSampler, -) - -from pylibcugraph import MGGraph, ResourceHandle, GraphProperties - -from ogb.nodeproppred import NodePropPredDataset - - -def init_pytorch(rank, world_size): - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - dist.init_process_group("nccl", rank=rank, world_size=world_size) - - -def sample(rank: int, world_size: int, uid, edgelist, directory): - init_pytorch(rank, world_size) - - device = rank - cugraph_comms_init(rank, world_size, uid, device) - - print(f"rank {rank} initialized cugraph") - - src = cudf.Series(np.array_split(edgelist[0], world_size)[rank]) - dst = cudf.Series(np.array_split(edgelist[1], world_size)[rank]) - - seeds_per_rank = 50 - seeds = cudf.Series(np.arange(rank * seeds_per_rank, (rank + 1) * seeds_per_rank)) - handle = ResourceHandle(cugraph_comms_get_raft_handle().getHandle()) - - print("constructing graph") - G = MGGraph( - handle, - GraphProperties(is_multigraph=True, is_symmetric=False), - [src], - [dst], - ) - print("graph constructed") - - sample_writer = DistSampleWriter(directory=directory, batches_per_partition=2) - sampler = UniformNeighborSampler( - G, - sample_writer, - fanout=[5, 5], - ) - - sampler.sample_from_nodes(seeds, batch_size=16, random_state=62) - - dist.barrier() - cugraph_comms_shutdown() - print(f"rank {rank} shut down cugraph") - - -def main(): - world_size = torch.cuda.device_count() - uid = cugraph_comms_create_unique_id() - - dataset = NodePropPredDataset("ogbn-products") - el = dataset[0][0]["edge_index"].astype("int64") - - with tempfile.TemporaryDirectory() as directory: - tmp.spawn( - sample, - args=(world_size, uid, el, directory), - nprocs=world_size, - ) - - print("Printing samples...") - for file in os.listdir(directory): - m = re.match(r"batch=([0-9]+)\.([0-9]+)\-([0-9]+)\.([0-9]+)\.parquet", file) - rank, start, _, end = int(m[1]), int(m[2]), int(m[3]), int(m[4]) - print(f"File: {file} (batches {start} to {end} for rank {rank})") - print(cudf.read_parquet(os.path.join(directory, file))) - print("\n") - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/cugraph_pyg/examples/cugraph_dist_sampling_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/cugraph_dist_sampling_sg.py deleted file mode 100644 index de45acc7456..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/cugraph_dist_sampling_sg.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This example shows how to use cuGraph nccl-only comms, pylibcuGraph, -# and PyTorch to run a single-GPU sampling workflow. Most users of the -# GNN packages will not interact with cuGraph directly. This example -# is intented for users who want to extend cuGraph within a PyTorch workflow. - -import os -import re -import tempfile - -import numpy as np - -import cudf - -from cugraph.gnn import ( - DistSampleWriter, - UniformNeighborSampler, -) - -from pylibcugraph import SGGraph, ResourceHandle, GraphProperties - -from ogb.nodeproppred import NodePropPredDataset - - -def sample(edgelist, directory): - src = cudf.Series(edgelist[0]) - dst = cudf.Series(edgelist[1]) - - seeds_per_rank = 50 - seeds = cudf.Series(np.arange(0, seeds_per_rank)) - - print("constructing graph") - G = SGGraph( - ResourceHandle(), - GraphProperties(is_multigraph=True, is_symmetric=False), - src, - dst, - ) - print("graph constructed") - - sample_writer = DistSampleWriter(directory=directory, batches_per_partition=2) - sampler = UniformNeighborSampler( - G, - sample_writer, - fanout=[5, 5], - compression="CSR", - retain_original_seeds=True, - ) - - sampler.sample_from_nodes(seeds, batch_size=16, random_state=62) - - -def main(): - dataset = NodePropPredDataset("ogbn-products") - el = dataset[0][0]["edge_index"].astype("int64") - - with tempfile.TemporaryDirectory() as directory: - sample(el, directory) - - print("Printing samples...") - for file in os.listdir(directory): - m = re.match(r"batch=([0-9]+)\.([0-9]+)\-([0-9]+)\.([0-9]+)\.parquet", file) - rank, start, _, end = int(m[1]), int(m[2]), int(m[3]), int(m[4]) - print(f"File: {file} (batches {start} to {end} for rank {rank})") - print(cudf.read_parquet(os.path.join(directory, file))) - print("\n") - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py deleted file mode 100644 index 127ca809d91..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py +++ /dev/null @@ -1,446 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Multi-node, multi-GPU example with WholeGraph feature storage. -# Can be run with torchrun. - -import argparse -import os -import warnings -import tempfile -import time -import json - - -import torch -import torch.distributed as dist -import torch.nn.functional as F -from ogb.nodeproppred import PygNodePropPredDataset -from torch.nn.parallel import DistributedDataParallel - -import torch_geometric - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, -) - -from pylibwholegraph.torch.initialize import ( - init as wm_init, - finalize as wm_finalize, -) - -# Allow computation on objects that are larger than GPU memory -# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory -os.environ["CUDF_SPILL"] = "1" - -# Ensures that a CUDA context is not created on import of rapids. -# Allows pytorch to create the context instead -os.environ["RAPIDS_NO_INITIALIZE"] = "1" - - -def init_pytorch_worker(global_rank, local_rank, world_size, cugraph_id): - import rmm - - rmm.reinitialize( - devices=local_rank, - managed_memory=True, - pool_allocator=True, - ) - - import cupy - - cupy.cuda.Device(local_rank).use() - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - torch.cuda.set_device(local_rank) - - cugraph_comms_init( - rank=global_rank, world_size=world_size, uid=cugraph_id, device=local_rank - ) - - wm_init(global_rank, world_size, local_rank, torch.cuda.device_count()) - - -def partition_data(dataset, split_idx, edge_path, feature_path, label_path, meta_path): - data = dataset[0] - - # Split and save edge index - os.makedirs( - edge_path, - exist_ok=True, - ) - for (r, e) in enumerate(torch.tensor_split(data.edge_index, world_size, dim=1)): - rank_path = os.path.join(edge_path, f"rank={r}.pt") - torch.save( - e.clone(), - rank_path, - ) - - # Split and save features - os.makedirs( - feature_path, - exist_ok=True, - ) - - for (r, f) in enumerate(torch.tensor_split(data.x, world_size)): - rank_path = os.path.join(feature_path, f"rank={r}_x.pt") - torch.save( - f.clone(), - rank_path, - ) - for (r, f) in enumerate(torch.tensor_split(data.y, world_size)): - rank_path = os.path.join(feature_path, f"rank={r}_y.pt") - torch.save( - f.clone(), - rank_path, - ) - - # Split and save labels - os.makedirs( - label_path, - exist_ok=True, - ) - for (d, i) in split_idx.items(): - i_parts = torch.tensor_split(i, world_size) - for r, i_part in enumerate(i_parts): - rank_path = os.path.join(label_path, f"rank={r}") - os.makedirs(rank_path, exist_ok=True) - torch.save(i_part, os.path.join(rank_path, f"{d}.pt")) - - # Save metadata - meta = { - "num_classes": int(dataset.num_classes), - "num_features": int(dataset.num_features), - "num_nodes": int(data.num_nodes), - } - with open(meta_path, "w") as f: - json.dump(meta, f) - - -def load_partitioned_data( - rank, edge_path, feature_path, label_path, meta_path, wg_mem_type -): - from cugraph_pyg.data import GraphStore, WholeFeatureStore - - graph_store = GraphStore(is_multi_gpu=True) - feature_store = WholeFeatureStore(memory_type=wg_mem_type) - - # Load metadata - with open(meta_path, "r") as f: - meta = json.load(f) - - # Load labels - split_idx = {} - for split in ["train", "test", "valid"]: - split_idx[split] = torch.load( - os.path.join(label_path, f"rank={rank}", f"{split}.pt") - ) - - # Load features - feature_store["node", "x"] = torch.load( - os.path.join(feature_path, f"rank={rank}_x.pt") - ) - feature_store["node", "y"] = torch.load( - os.path.join(feature_path, f"rank={rank}_y.pt") - ) - - # Load edge index - eix = torch.load(os.path.join(edge_path, f"rank={rank}.pt")) - graph_store[ - ("node", "rel", "node"), "coo", False, (meta["num_nodes"], meta["num_nodes"]) - ] = eix - - return (feature_store, graph_store), split_idx, meta - - -def run_train( - global_rank, - data, - split_idx, - world_size, - device, - model, - epochs, - batch_size, - fan_out, - num_classes, - wall_clock_start, - tempdir=None, - num_layers=3, - in_memory=False, - seeds_per_call=-1, -): - optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005) - - kwargs = dict( - num_neighbors=[fan_out] * num_layers, - batch_size=batch_size, - ) - # Set Up Neighbor Loading - from cugraph_pyg.loader import NeighborLoader - - ix_train = split_idx["train"].cuda() - train_path = None if in_memory else os.path.join(tempdir, f"train_{global_rank}") - if train_path: - os.mkdir(train_path) - train_loader = NeighborLoader( - data, - input_nodes=ix_train, - directory=train_path, - shuffle=True, - drop_last=True, - local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, - **kwargs, - ) - - ix_test = split_idx["test"].cuda() - test_path = None if in_memory else os.path.join(tempdir, f"test_{global_rank}") - if test_path: - os.mkdir(test_path) - test_loader = NeighborLoader( - data, - input_nodes=ix_test, - directory=test_path, - shuffle=True, - drop_last=True, - local_seeds_per_call=80000, - **kwargs, - ) - - ix_valid = split_idx["valid"].cuda() - valid_path = None if in_memory else os.path.join(tempdir, f"valid_{global_rank}") - if valid_path: - os.mkdir(valid_path) - valid_loader = NeighborLoader( - data, - input_nodes=ix_valid, - directory=valid_path, - shuffle=True, - drop_last=True, - local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, - **kwargs, - ) - - dist.barrier() - - eval_steps = 1000 - warmup_steps = 20 - dist.barrier() - torch.cuda.synchronize() - - if global_rank == 0: - prep_time = round(time.perf_counter() - wall_clock_start, 2) - print("Total time before training begins (prep_time) =", prep_time, "seconds") - print("Beginning training...") - - for epoch in range(epochs): - for i, batch in enumerate(train_loader): - if i == warmup_steps: - torch.cuda.synchronize() - start = time.time() - - batch = batch.to(device) - batch_size = batch.batch_size - - batch.y = batch.y.view(-1).to(torch.long) - optimizer.zero_grad() - out = model(batch.x, batch.edge_index) - loss = F.cross_entropy(out[:batch_size], batch.y[:batch_size]) - loss.backward() - optimizer.step() - if global_rank == 0 and i % 10 == 0: - print( - "Epoch: " - + str(epoch) - + ", Iteration: " - + str(i) - + ", Loss: " - + str(loss) - ) - nb = i + 1.0 - - if global_rank == 0: - print( - "Average Training Iteration Time:", - (time.time() - start) / (nb - warmup_steps), - "s/iter", - ) - - with torch.no_grad(): - total_correct = total_examples = 0 - for i, batch in enumerate(valid_loader): - if i >= eval_steps: - break - - batch = batch.to(device) - batch_size = batch.batch_size - - batch.y = batch.y.to(torch.long) - out = model(batch.x, batch.edge_index)[:batch_size] - - pred = out.argmax(dim=-1) - y = batch.y[:batch_size].view(-1).to(torch.long) - - total_correct += int((pred == y).sum()) - total_examples += y.size(0) - - acc_val = total_correct / total_examples - if global_rank == 0: - print( - f"Validation Accuracy: {acc_val * 100.0:.4f}%", - ) - - torch.cuda.synchronize() - - with torch.no_grad(): - total_correct = total_examples = 0 - for i, batch in enumerate(test_loader): - batch = batch.to(device) - batch_size = batch.batch_size - - batch.y = batch.y.to(torch.long) - out = model(batch.x, batch.edge_index)[:batch_size] - - pred = out.argmax(dim=-1) - y = batch.y[:batch_size].view(-1).to(torch.long) - - total_correct += int((pred == y).sum()) - total_examples += y.size(0) - - acc_test = total_correct / total_examples - if global_rank == 0: - print( - f"Test Accuracy: {acc_test * 100.0:.4f}%", - ) - - if global_rank == 0: - total_time = round(time.perf_counter() - wall_clock_start, 2) - print("Total Program Runtime (total_time) =", total_time, "seconds") - print("total_time - prep_time =", total_time - prep_time, "seconds") - - wm_finalize() - cugraph_comms_shutdown() - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--hidden_channels", type=int, default=256) - parser.add_argument("--num_layers", type=int, default=2) - parser.add_argument("--lr", type=float, default=0.001) - parser.add_argument("--epochs", type=int, default=4) - parser.add_argument("--batch_size", type=int, default=1024) - parser.add_argument("--fan_out", type=int, default=30) - parser.add_argument("--tempdir_root", type=str, default=None) - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--dataset", type=str, default="ogbn-products") - parser.add_argument("--skip_partition", action="store_true") - parser.add_argument("--wg_mem_type", type=str, default="distributed") - - parser.add_argument("--in_memory", action="store_true", default=False) - parser.add_argument("--seeds_per_call", type=int, default=-1) - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - wall_clock_start = time.perf_counter() - - if "LOCAL_RANK" in os.environ: - dist.init_process_group("nccl") - world_size = dist.get_world_size() - global_rank = dist.get_rank() - local_rank = int(os.environ["LOCAL_RANK"]) - device = torch.device(local_rank) - - # Create the uid needed for cuGraph comms - if global_rank == 0: - cugraph_id = [cugraph_comms_create_unique_id()] - else: - cugraph_id = [None] - dist.broadcast_object_list(cugraph_id, src=0, device=device) - cugraph_id = cugraph_id[0] - - init_pytorch_worker(global_rank, local_rank, world_size, cugraph_id) - - # Split the data - edge_path = os.path.join(args.dataset_root, args.dataset + "_eix_part") - feature_path = os.path.join(args.dataset_root, args.dataset + "_fea_part") - label_path = os.path.join(args.dataset_root, args.dataset + "_label_part") - meta_path = os.path.join(args.dataset_root, args.dataset + "_meta.json") - - # We partition the data to avoid loading it in every worker, which will - # waste memory and can lead to an out of memory exception. - # cugraph_pyg.GraphStore and cugraph_pyg.WholeFeatureStore are always - # constructed from partitions of the edge index and features, respectively, - # so this works well. - if not args.skip_partition and global_rank == 0: - dataset = PygNodePropPredDataset(name=args.dataset, root=args.dataset_root) - split_idx = dataset.get_idx_split() - - partition_data( - dataset, - split_idx, - meta_path=meta_path, - label_path=label_path, - feature_path=feature_path, - edge_path=edge_path, - ) - - dist.barrier() - data, split_idx, meta = load_partitioned_data( - rank=global_rank, - edge_path=edge_path, - feature_path=feature_path, - label_path=label_path, - meta_path=meta_path, - wg_mem_type=args.wg_mem_type, - ) - dist.barrier() - - model = torch_geometric.nn.models.GCN( - meta["num_features"], - args.hidden_channels, - args.num_layers, - meta["num_classes"], - ).to(device) - model = DistributedDataParallel(model, device_ids=[local_rank]) - - with tempfile.TemporaryDirectory(dir=args.tempdir_root) as tempdir: - run_train( - global_rank, - data, - split_idx, - world_size, - device, - model, - args.epochs, - args.batch_size, - args.fan_out, - meta["num_classes"], - wall_clock_start, - tempdir, - args.num_layers, - args.in_memory, - args.seeds_per_call, - ) - else: - warnings.warn("This script should be run with 'torchrun`. Exiting.") diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py deleted file mode 100644 index 0f9c39bf04d..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import argparse -import tempfile -import os -import warnings - -from typing import Optional, Tuple, Dict - -import torch -import cupy - -import rmm -from rmm.allocators.cupy import rmm_cupy_allocator -from rmm.allocators.torch import rmm_torch_allocator - -# Must change allocators immediately upon import -# or else other imports will cause memory to be -# allocated and prevent changing the allocator -rmm.reinitialize(devices=[0], pool_allocator=True, managed_memory=True) -cupy.cuda.set_allocator(rmm_cupy_allocator) -torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - -import torch.nn.functional as F # noqa: E402 -import torch_geometric # noqa: E402 -import cugraph_pyg # noqa: E402 -from cugraph_pyg.loader import NeighborLoader # noqa: E402 - -# Enable cudf spilling to save gpu memory -from cugraph.testing.mg_utils import enable_spilling # noqa: E402 - -enable_spilling() - - -def train(epoch: int): - model.train() - for i, batch in enumerate(train_loader): - if i == warmup_steps: - torch.cuda.synchronize() - start_avg_time = time.perf_counter() - batch = batch.to(device) - - optimizer.zero_grad() - batch_size = batch.batch_size - out = model(batch.x, batch.edge_index)[:batch_size] - y = batch.y[:batch_size].view(-1).to(torch.long) - - loss = F.cross_entropy(out, y) - loss.backward() - optimizer.step() - - if i % 10 == 0: - print(f"Epoch: {epoch:02d}, Iteration: {i}, Loss: {loss:.4f}") - torch.cuda.synchronize() - print( - f"Average Training Iteration Time (s/iter): \ - {(time.perf_counter() - start_avg_time) / (i - warmup_steps):.6f}" - ) - - -@torch.no_grad() -def test(loader: NeighborLoader, val_steps: Optional[int] = None): - model.eval() - - total_correct = total_examples = 0 - for i, batch in enumerate(loader): - if val_steps is not None and i >= val_steps: - break - batch = batch.to(device) - batch_size = batch.batch_size - out = model(batch.x, batch.edge_index)[:batch_size] - pred = out.argmax(dim=-1) - y = batch.y[:batch_size].view(-1).to(torch.long) - - total_correct += int((pred == y).sum()) - total_examples += y.size(0) - - return total_correct / total_examples - - -def create_loader( - data, - num_neighbors, - input_nodes, - replace, - batch_size, - samples_dir, - stage_name, - local_seeds_per_call, -): - if samples_dir is not None: - directory = os.path.join(samples_dir, stage_name) - os.mkdir(directory) - else: - directory = None - return NeighborLoader( - data, - num_neighbors=num_neighbors, - input_nodes=input_nodes, - replace=replace, - batch_size=batch_size, - directory=directory, - local_seeds_per_call=local_seeds_per_call, - ) - - -def load_data( - dataset, dataset_root -) -> Tuple[ - Tuple[torch_geometric.data.FeatureStore, torch_geometric.data.GraphStore], - Dict[str, torch.Tensor], - int, - int, -]: - from ogb.nodeproppred import PygNodePropPredDataset - - dataset = PygNodePropPredDataset(dataset, root=dataset_root) - split_idx = dataset.get_idx_split() - data = dataset[0] - - graph_store = cugraph_pyg.data.GraphStore() - graph_store[ - ("node", "rel", "node"), "coo", False, (data.num_nodes, data.num_nodes) - ] = data.edge_index - - feature_store = cugraph_pyg.data.TensorDictFeatureStore() - feature_store["node", "x"] = data.x - feature_store["node", "y"] = data.y - - return ( - (feature_store, graph_store), - split_idx, - dataset.num_features, - dataset.num_classes, - ) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--hidden_channels", type=int, default=256) - parser.add_argument("--num_layers", type=int, default=2) - parser.add_argument("--lr", type=float, default=0.001) - parser.add_argument("--epochs", type=int, default=4) - parser.add_argument("--batch_size", type=int, default=1024) - parser.add_argument("--fan_out", type=int, default=30) - parser.add_argument("--tempdir_root", type=str, default=None) - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--dataset", type=str, default="ogbn-products") - parser.add_argument("--in_memory", action="store_true", default=False) - parser.add_argument("--seeds_per_call", type=int, default=-1) - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - wall_clock_start = time.perf_counter() - device = torch.device("cuda") - - data, split_idx, num_features, num_classes = load_data( - args.dataset, args.dataset_root - ) - - if "CI_RUN" in os.environ and os.environ["CI_RUN"] == "1": - warnings.warn("Pruning test dataset for CI run.") - split_idx["test"] = split_idx["test"][:1000] - - with tempfile.TemporaryDirectory(dir=args.tempdir_root) as samples_dir: - loader_kwargs = { - "data": data, - "num_neighbors": [args.fan_out] * args.num_layers, - "replace": False, - "batch_size": args.batch_size, - "samples_dir": None if args.in_memory else samples_dir, - "local_seeds_per_call": None - if args.seeds_per_call <= 0 - else args.seeds_per_call, - } - - train_loader = create_loader( - input_nodes=split_idx["train"], - stage_name="train", - **loader_kwargs, - ) - - val_loader = create_loader( - input_nodes=split_idx["valid"], - stage_name="val", - **loader_kwargs, - ) - - test_loader = create_loader( - input_nodes=split_idx["test"], - stage_name="test", - **loader_kwargs, - ) - - model = torch_geometric.nn.models.GCN( - num_features, - args.hidden_channels, - args.num_layers, - num_classes, - ).to(device) - - optimizer = torch.optim.Adam( - model.parameters(), lr=args.lr, weight_decay=0.0005 - ) - - warmup_steps = 20 - - torch.cuda.synchronize() - prep_time = round(time.perf_counter() - wall_clock_start, 2) - print("Total time before training begins (prep_time)=", prep_time, "seconds") - print("Beginning training...") - for epoch in range(1, 1 + args.epochs): - train(epoch) - val_acc = test(val_loader, val_steps=100) - print(f"Val Acc: ~{val_acc:.4f}") - - test_acc = test(test_loader) - print(f"Test Acc: {test_acc:.4f}") - total_time = round(time.perf_counter() - wall_clock_start, 2) - print("Total Program Runtime (total_time) =", total_time, "seconds") - print("total_time - prep_time =", total_time - prep_time, "seconds") diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py deleted file mode 100644 index 73efbc92a24..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Single-node, multi-GPU example. - -import argparse -import os -import tempfile -import time -import warnings - -import torch -import torch.distributed as dist -import torch.multiprocessing as mp -import torch.nn.functional as F -from ogb.nodeproppred import PygNodePropPredDataset -from torch.nn.parallel import DistributedDataParallel - -import torch_geometric - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, -) - -# Allow computation on objects that are larger than GPU memory -# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory -os.environ["CUDF_SPILL"] = "1" - -# Ensures that a CUDA context is not created on import of rapids. -# Allows pytorch to create the context instead -os.environ["RAPIDS_NO_INITIALIZE"] = "1" - - -def init_pytorch_worker(rank, world_size, cugraph_id): - import rmm - - rmm.reinitialize( - devices=rank, - managed_memory=True, - pool_allocator=True, - ) - - import cupy - - cupy.cuda.Device(rank).use() - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - torch.cuda.set_device(rank) - - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - dist.init_process_group("nccl", rank=rank, world_size=world_size) - - cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) - - -def run_train( - rank, - data, - world_size, - cugraph_id, - model, - epochs, - batch_size, - fan_out, - split_idx, - num_classes, - wall_clock_start, - tempdir=None, - num_layers=3, - in_memory=False, - seeds_per_call=-1, -): - - init_pytorch_worker( - rank, - world_size, - cugraph_id, - ) - - model = model.to(rank) - model = DistributedDataParallel(model, device_ids=[rank]) - optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005) - - kwargs = dict( - num_neighbors=[fan_out] * num_layers, - batch_size=batch_size, - ) - # Set Up Neighbor Loading - from cugraph_pyg.data import GraphStore, TensorDictFeatureStore - from cugraph_pyg.loader import NeighborLoader - - graph_store = GraphStore(is_multi_gpu=True) - ixr = torch.tensor_split(data.edge_index, world_size, dim=1)[rank] - graph_store[ - ("node", "rel", "node"), "coo", False, (data.num_nodes, data.num_nodes) - ] = ixr - - feature_store = TensorDictFeatureStore() - feature_store["node", "x"] = data.x - feature_store["node", "y"] = data.y - - dist.barrier() - - ix_train = torch.tensor_split(split_idx["train"], world_size)[rank].cuda() - train_path = None if in_memory else os.path.join(tempdir, f"train_{rank}") - if train_path: - os.mkdir(train_path) - train_loader = NeighborLoader( - (feature_store, graph_store), - input_nodes=ix_train, - directory=train_path, - shuffle=True, - drop_last=True, - local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, - **kwargs, - ) - - ix_test = torch.tensor_split(split_idx["test"], world_size)[rank].cuda() - test_path = None if in_memory else os.path.join(tempdir, f"test_{rank}") - if test_path: - os.mkdir(test_path) - test_loader = NeighborLoader( - (feature_store, graph_store), - input_nodes=ix_test, - directory=test_path, - shuffle=True, - drop_last=True, - local_seeds_per_call=80000, - **kwargs, - ) - - ix_valid = torch.tensor_split(split_idx["valid"], world_size)[rank].cuda() - valid_path = None if in_memory else os.path.join(tempdir, f"valid_{rank}") - if valid_path: - os.mkdir(valid_path) - valid_loader = NeighborLoader( - (feature_store, graph_store), - input_nodes=ix_valid, - directory=valid_path, - shuffle=True, - drop_last=True, - local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, - **kwargs, - ) - - dist.barrier() - - eval_steps = 1000 - warmup_steps = 20 - dist.barrier() - torch.cuda.synchronize() - - if rank == 0: - prep_time = round(time.perf_counter() - wall_clock_start, 2) - print("Total time before training begins (prep_time) =", prep_time, "seconds") - print("Beginning training...") - for epoch in range(epochs): - for i, batch in enumerate(train_loader): - if i == warmup_steps: - torch.cuda.synchronize() - start = time.time() - - batch = batch.to(rank) - batch_size = batch.batch_size - - batch.y = batch.y.to(torch.long) - optimizer.zero_grad() - out = model(batch.x, batch.edge_index) - loss = F.cross_entropy(out[:batch_size], batch.y[:batch_size]) - loss.backward() - optimizer.step() - if rank == 0 and i % 10 == 0: - print( - "Epoch: " - + str(epoch) - + ", Iteration: " - + str(i) - + ", Loss: " - + str(loss) - ) - nb = i + 1.0 - - if rank == 0: - print( - "Average Training Iteration Time:", - (time.time() - start) / (nb - warmup_steps), - "s/iter", - ) - - with torch.no_grad(): - total_correct = total_examples = 0 - for i, batch in enumerate(valid_loader): - if i >= eval_steps: - break - - batch = batch.to(rank) - batch_size = batch.batch_size - - batch.y = batch.y.to(torch.long) - out = model(batch.x, batch.edge_index)[:batch_size] - - pred = out.argmax(dim=-1) - y = batch.y[:batch_size].view(-1).to(torch.long) - - total_correct += int((pred == y).sum()) - total_examples += y.size(0) - - acc_val = total_correct / total_examples - if rank == 0: - print( - f"Validation Accuracy: {acc_val * 100.0:.4f}%", - ) - - torch.cuda.synchronize() - - with torch.no_grad(): - total_correct = total_examples = 0 - for i, batch in enumerate(test_loader): - batch = batch.to(rank) - batch_size = batch.batch_size - - batch.y = batch.y.to(torch.long) - out = model(batch.x, batch.edge_index)[:batch_size] - - pred = out.argmax(dim=-1) - y = batch.y[:batch_size].view(-1).to(torch.long) - - total_correct += int((pred == y).sum()) - total_examples += y.size(0) - - acc_test = total_correct / total_examples - if rank == 0: - print( - f"Test Accuracy: {acc_test * 100.0:.4f}%", - ) - - if rank == 0: - total_time = round(time.perf_counter() - wall_clock_start, 2) - print("Total Program Runtime (total_time) =", total_time, "seconds") - print("total_time - prep_time =", total_time - prep_time, "seconds") - - cugraph_comms_shutdown() - dist.destroy_process_group() - - -if __name__ == "__main__": - if "CI_RUN" in os.environ and os.environ["CI_RUN"] == "1": - warnings.warn("Skipping SMNG example in CI due to memory limit") - else: - parser = argparse.ArgumentParser() - parser.add_argument("--hidden_channels", type=int, default=256) - parser.add_argument("--num_layers", type=int, default=2) - parser.add_argument("--lr", type=float, default=0.001) - parser.add_argument("--epochs", type=int, default=4) - parser.add_argument("--batch_size", type=int, default=1024) - parser.add_argument("--fan_out", type=int, default=30) - parser.add_argument("--tempdir_root", type=str, default=None) - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--dataset", type=str, default="ogbn-products") - parser.add_argument("--in_memory", action="store_true", default=False) - parser.add_argument("--seeds_per_call", type=int, default=-1) - - parser.add_argument( - "--n_devices", - type=int, - default=-1, - help="1-8 to use that many GPUs. Defaults to all available GPUs", - ) - - args = parser.parse_args() - wall_clock_start = time.perf_counter() - - from rmm.allocators.torch import rmm_torch_allocator - - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - - dataset = PygNodePropPredDataset(name=args.dataset, root=args.dataset_root) - split_idx = dataset.get_idx_split() - data = dataset[0] - data.y = data.y.reshape(-1) - - model = torch_geometric.nn.models.GCN( - dataset.num_features, - args.hidden_channels, - args.num_layers, - dataset.num_classes, - ) - - print("Data =", data) - if args.n_devices == -1: - world_size = torch.cuda.device_count() - else: - world_size = args.n_devices - print("Using", world_size, "GPUs...") - - # Create the uid needed for cuGraph comms - cugraph_id = cugraph_comms_create_unique_id() - - with tempfile.TemporaryDirectory(dir=args.tempdir_root) as tempdir: - mp.spawn( - run_train, - args=( - data, - world_size, - cugraph_id, - model, - args.epochs, - args.batch_size, - args.fan_out, - split_idx, - dataset.num_classes, - wall_clock_start, - tempdir, - args.num_layers, - args.in_memory, - args.seeds_per_call, - ), - nprocs=world_size, - join=True, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_mg.py b/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_mg.py deleted file mode 100644 index 145675c8a06..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_mg.py +++ /dev/null @@ -1,446 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# For this script, dask must be started first in a separate process. -# To do this, the `start_dask.sh` script has been provided. This scripts starts -# a dask scheduler and dask workers. To select the GPUs and amount of memory -# allocated to dask per GPU, the `CUDA_VISIBLE_DEVICES` and `WORKER_RMM_POOL_SIZE` -# arguments in that script can be modified. -# To connect to dask, the scheduler JSON file must be provided. This can be done -# using the `--dask_scheduler_file` argument in the mg python script being run. - -from ogb.nodeproppred import NodePropPredDataset - -import time -import argparse -import gc -import warnings - -import torch -import numpy as np - -from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv - -import torch.nn as nn -import torch.nn.functional as F - -import torch.distributed as td -import torch.multiprocessing as tmp -from torch.nn.parallel import DistributedDataParallel as ddp - -from typing import List - - -class CuGraphSAGE(nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_layers): - super().__init__() - - self.convs = torch.nn.ModuleList() - self.convs.append(CuGraphSAGEConv(in_channels, hidden_channels)) - for _ in range(num_layers - 1): - conv = CuGraphSAGEConv(hidden_channels, hidden_channels) - self.convs.append(conv) - - self.lin = nn.Linear(hidden_channels, out_channels) - - def forward(self, x, edge, size): - edge_csc = CuGraphSAGEConv.to_csc(edge, (size[0], size[0])) - for conv in self.convs: - x = conv(x, edge_csc)[: size[1]] - x = F.relu(x) - x = F.dropout(x, p=0.5) - - return self.lin(x) - - -def enable_cudf_spilling(): - import cudf - - cudf.set_option("spill", True) - - -def init_pytorch_worker(rank, devices, manager_ip, manager_port) -> None: - import cupy - import rmm - - device_id = devices[rank] - - rmm.reinitialize( - devices=[device_id], - pool_allocator=False, - ) - - # torch.cuda.change_current_allocator(rmm.rmm_torch_allocator) - # cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) - - cupy.cuda.Device(device_id).use() - torch.cuda.set_device(device_id) - - # Pytorch training worker initialization - dist_init_method = f"tcp://{manager_ip}:{manager_port}" - - torch.distributed.init_process_group( - backend="nccl", - init_method=dist_init_method, - world_size=len(devices), - rank=rank, - ) - - # enable_cudf_spilling() - - -def start_cugraph_dask_client(rank, dask_scheduler_file): - print( - "Connecting to dask... " - "(warning: this may take a while depending on your configuration)" - ) - start_time_connect_dask = time.perf_counter_ns() - from distributed import Client - from cugraph.dask.comms import comms as Comms - - client = Client(scheduler_file=dask_scheduler_file) - Comms.initialize(p2p=True) - - end_time_connect_dask = time.perf_counter_ns() - print( - f"Successfully connected to dask on rank {rank}, took " - f"{(end_time_connect_dask - start_time_connect_dask) / 1e9:3.4f} s" - ) - return client - - -def stop_cugraph_dask_client(): - from cugraph.dask.comms import comms as Comms - - Comms.destroy() - - from dask.distributed import get_client - - get_client().close() - - -def train( - rank, - torch_devices: List[int], - manager_ip: str, - manager_port: int, - dask_scheduler_file: str, - num_epochs: int, - features_on_gpu=True, -) -> None: - """ - Parameters - ---------- - device: int - The CUDA device where the model, graph data, and node labels will be stored. - features_on_gpu: bool - Whether to store a replica of features on each worker's GPU. If False, - all features will be stored on the CPU. - """ - - start_time_preprocess = time.perf_counter_ns() - - world_size = len(torch_devices) - device_id = torch_devices[rank] - features_device = device_id if features_on_gpu else "cpu" - init_pytorch_worker(rank, torch_devices, manager_ip, manager_port) - td.barrier() - - client = start_cugraph_dask_client(rank, dask_scheduler_file) - - from distributed import Event as Dask_Event - - event = Dask_Event("cugraph_store_creation_event") - download_event = Dask_Event("dataset_download_event") - - td.barrier() - - import cugraph - from cugraph_pyg.data import DaskGraphStore - from cugraph_pyg.loader import DaskNeighborLoader - - if rank == 0: - print("Rank 0 downloading dataset") - dataset = NodePropPredDataset(name="ogbn-mag") - data = dataset[0] - download_event.set() - print("Dataset downloaded") - else: - if download_event.wait(timeout=1000): - print(f"Rank {rank} loading dataset") - dataset = NodePropPredDataset(name="ogbn-mag") - data = dataset[0] - print(f"Rank {rank} loaded dataset successfully") - - ei = data[0]["edge_index_dict"][("paper", "cites", "paper")] - G = { - ("paper", "cites", "paper"): np.stack( - [np.concatenate([ei[0], ei[1]]), np.concatenate([ei[1], ei[0]])] - ) - } - N = {"paper": data[0]["num_nodes_dict"]["paper"]} - - fs = cugraph.gnn.FeatureStore(backend="torch") - - fs.add_data( - torch.as_tensor(data[0]["node_feat_dict"]["paper"], device=features_device), - "paper", - "x", - ) - - fs.add_data(torch.as_tensor(data[1]["paper"].T[0], device=device_id), "paper", "y") - - num_papers = data[0]["num_nodes_dict"]["paper"] - - if rank == 0: - train_perc = 0.1 - all_train_nodes = torch.randperm(num_papers) - all_train_nodes = all_train_nodes[: int(train_perc * num_papers)] - train_nodes = all_train_nodes[: int(len(all_train_nodes) / world_size)] - - train_mask = torch.full((num_papers,), -1, device=device_id) - train_mask[train_nodes] = 1 - fs.add_data(train_mask, "paper", "train") - - print(f"Rank {rank} finished loading graph and feature data") - - if rank == 0: - print("Rank 0 creating its cugraph store and initializing distributed graph") - # Rank 0 will initialize the distributed cugraph graph. - cugraph_store_create_start = time.perf_counter_ns() - print("G:", G[("paper", "cites", "paper")].shape) - cugraph_store = DaskGraphStore(fs, G, N, multi_gpu=True) - cugraph_store_create_end = time.perf_counter_ns() - print( - "cuGraph Store created on rank 0 in " - f"{(cugraph_store_create_end - cugraph_store_create_start) / 1e9:3.4f} s" - ) - client.publish_dataset(train_nodes=all_train_nodes) - event.set() - print("Rank 0 done with cugraph store creation") - else: - if event.wait(timeout=1000): - print(f"Rank {rank} creating cugraph store") - train_nodes = client.get_dataset("train_nodes") - train_nodes = train_nodes[ - int(rank * len(train_nodes) / world_size) : int( - (rank + 1) * len(train_nodes) / world_size - ) - ] - - train_mask = torch.full((num_papers,), -1, device=device_id) - train_mask[train_nodes] = 1 - fs.add_data(train_mask, "paper", "train") - - # Will automatically use the stored distributed cugraph graph on rank 0. - cugraph_store_create_start = time.perf_counter_ns() - cugraph_store = DaskGraphStore(fs, G, N, multi_gpu=True) - cugraph_store_create_end = time.perf_counter_ns() - print( - f"Rank {rank} created cugraph store in " - f"{(cugraph_store_create_end - cugraph_store_create_start) / 1e9:3.4f}" - " s" - ) - print(f"Rank {rank} done with cugraph store creation") - - end_time_preprocess = time.perf_counter_ns() - print(f"rank {rank}: train {train_nodes.shape}", flush=True) - print( - f"rank {rank}: all preprocessing took" - f" {(end_time_preprocess - start_time_preprocess) / 1e9:3.4f}", - flush=True, - ) - td.barrier() - model = ( - CuGraphSAGE(in_channels=128, hidden_channels=64, out_channels=349, num_layers=3) - .to(torch.float32) - .to(device_id) - ) - model = ddp(model, device_ids=[device_id], output_device=device_id) - td.barrier() - - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - - for epoch in range(num_epochs): - start_time_train = time.perf_counter_ns() - model.train() - - start_time_loader = time.perf_counter_ns() - cugraph_bulk_loader = DaskNeighborLoader( - cugraph_store, - train_nodes, - batch_size=250, - num_neighbors=[10, 10, 10], - seeds_per_call=1000, - batches_per_partition=2, - replace=False, - ) - end_time_loader = time.perf_counter_ns() - total_time_loader = (end_time_loader - start_time_loader) / 1e9 - - total_loss = 0 - num_batches = 0 - - print(f"rank {rank} starting epoch {epoch}") - with td.algorithms.join.Join([model]): - total_time_sample = 0 - total_time_forward = 0 - total_time_backward = 0 - - start_time_sample = time.perf_counter_ns() - for iter_i, hetero_data in enumerate(cugraph_bulk_loader): - end_time_sample = time.perf_counter_ns() - total_time_sample += (end_time_sample - start_time_sample) / 1e9 - num_batches += 1 - - if iter_i % 20 == 0: - print(f"iteration {iter_i}") - - # train - train_mask = hetero_data.train_dict["paper"] - y_true = hetero_data.y_dict["paper"] - - start_time_forward = time.perf_counter_ns() - y_pred = model( - hetero_data.x_dict["paper"].to(device_id).to(torch.float32), - hetero_data.edge_index_dict[("paper", "cites", "paper")].to( - device_id - ), - (len(y_true), len(y_true)), - ) - end_time_forward = time.perf_counter_ns() - total_time_forward += (end_time_forward - start_time_forward) / 1e9 - - y_true = F.one_hot( - y_true[train_mask].to(torch.int64), num_classes=349 - ).to(torch.float32) - - y_pred = y_pred[train_mask] - - loss = F.cross_entropy(y_pred, y_true) - - start_time_backward = time.perf_counter_ns() - optimizer.zero_grad() - loss.backward() - optimizer.step() - end_time_backward = time.perf_counter_ns() - total_time_backward += (end_time_backward - start_time_backward) / 1e9 - - total_loss += loss.item() - - del y_true - del y_pred - del loss - del hetero_data - gc.collect() - - start_time_sample = time.perf_counter_ns() - - end_time_train = time.perf_counter_ns() - print( - f"epoch {epoch} " - f"total time: {(end_time_train - start_time_train) / 1e9:3.4f} s" - f"\nloader create time per batch: {total_time_loader / num_batches} s" - f"\nsampling/load time per batch: {total_time_sample / num_batches} s" - f"\nforward time per batch: {total_time_forward / num_batches} s" - f"\nbackward time per batch: {total_time_backward / num_batches} s" - f"\nnum batches: {num_batches}" - ) - print(f"loss after epoch {epoch}: {total_loss / num_batches}") - - td.barrier() - if rank == 0: - print("DONE", flush=True) - client.unpublish_dataset("train_nodes") - event.clear() - - td.destroy_process_group() - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--torch_devices", - type=str, - default="0,1", - help="GPU to allocate to pytorch for model, graph data, and node label storage", - required=False, - ) - - parser.add_argument( - "--num_epochs", - type=int, - default=1, - help="Number of training epochs", - required=False, - ) - - parser.add_argument( - "--features_on_gpu", - type=bool, - default=True, - help="Whether to store the features on each worker's GPU", - required=False, - ) - - parser.add_argument( - "--torch_manager_ip", - type=str, - default="127.0.0.1", - help="The torch distributed manager ip address", - required=False, - ) - - parser.add_argument( - "--torch_manager_port", - type=str, - default="12346", - help="The torch distributed manager port", - required=False, - ) - - parser.add_argument( - "--dask_scheduler_file", - type=str, - help="The path to the dask scheduler file", - required=False, - default=None, - ) - - return parser.parse_args() - - -def main(): - args = parse_args() - if args.dask_scheduler_file is None: - warnings.warn( - "You must provide the dask scheduler file " "to run this example. Exiting." - ) - - else: - torch_devices = [int(d) for d in args.torch_devices.split(",")] - - train_args = ( - torch_devices, - args.torch_manager_ip, - args.torch_manager_port, - args.dask_scheduler_file, - args.num_epochs, - args.features_on_gpu, - ) - - tmp.spawn(train, args=train_args, nprocs=len(torch_devices)) - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_sg.py deleted file mode 100644 index e0169ee2c25..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/graph_sage_sg.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import time -import argparse -import gc - -import torch - -from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv - -import torch.nn as nn -import torch.nn.functional as F - -from typing import Union - - -class CuGraphSAGE(nn.Module): - def __init__(self, in_channels, hidden_channels, out_channels, num_layers): - super().__init__() - - self.convs = torch.nn.ModuleList() - self.convs.append(CuGraphSAGEConv(in_channels, hidden_channels)) - for _ in range(num_layers - 1): - conv = CuGraphSAGEConv(hidden_channels, hidden_channels) - self.convs.append(conv) - - self.lin = nn.Linear(hidden_channels, out_channels) - - def forward(self, x, edge, size): - edge_csc = CuGraphSAGEConv.to_csc(edge, (size[0], size[0])) - for conv in self.convs: - x = conv(x, edge_csc)[: size[1]] - x = F.relu(x) - x = F.dropout(x, p=0.5) - - return self.lin(x) - - -def init_pytorch_worker(device_id: int) -> None: - import cupy - import rmm - - rmm.reinitialize( - devices=[device_id], - pool_allocator=False, - ) - - cupy.cuda.Device(device_id).use() - torch.cuda.set_device(device_id) - - -def train(device: int, features_device: Union[str, int] = "cpu", num_epochs=2) -> None: - """ - Parameters - ---------- - device: int - The CUDA device where the model, graph data, and node labels will be stored. - features_device: Union[str, int] - The device (CUDA device or CPU) where features will be stored. - """ - - init_pytorch_worker(device) - - import cugraph - from cugraph_pyg.data import DaskGraphStore - from cugraph_pyg.loader import DaskNeighborLoader - - from ogb.nodeproppred import NodePropPredDataset - - dataset = NodePropPredDataset(name="ogbn-mag") - data = dataset[0] - - G = data[0]["edge_index_dict"] - N = data[0]["num_nodes_dict"] - - fs = cugraph.gnn.FeatureStore(backend="torch") - - fs.add_data( - torch.as_tensor(data[0]["node_feat_dict"]["paper"], device=features_device), - "paper", - "x", - ) - - fs.add_data(torch.as_tensor(data[1]["paper"].T[0], device=device), "paper", "y") - - num_papers = data[0]["num_nodes_dict"]["paper"] - train_perc = 0.1 - - train_nodes = torch.randperm(num_papers) - train_nodes = train_nodes[: int(train_perc * num_papers)] - - train_mask = torch.full((num_papers,), -1, device=device) - train_mask[train_nodes] = 1 - - fs.add_data(train_mask, "paper", "train") - - cugraph_store = DaskGraphStore(fs, G, N) - - model = ( - CuGraphSAGE(in_channels=128, hidden_channels=64, out_channels=349, num_layers=3) - .to(torch.float32) - .to(device) - ) - - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - - for epoch in range(num_epochs): - start_time_train = time.perf_counter_ns() - model.train() - - cugraph_bulk_loader = DaskNeighborLoader( - cugraph_store, train_nodes, batch_size=500, num_neighbors=[10, 25] - ) - - total_loss = 0 - num_batches = 0 - - # This context manager will handle different # batches per rank - # barrier() cannot do this since the number of ops per rank is - # different. It essentially acts like barrier would if the - # number of ops per rank was the same. - for iter_i, hetero_data in enumerate(cugraph_bulk_loader): - num_batches += 1 - if iter_i % 20 == 0: - print(f"iteration {iter_i}") - - # train - train_mask = hetero_data.train_dict["paper"] - y_true = hetero_data.y_dict["paper"] - - y_pred = model( - hetero_data.x_dict["paper"].to(device).to(torch.float32), - hetero_data.edge_index_dict[("paper", "cites", "paper")].to(device), - (len(y_true), len(y_true)), - ) - - y_true = F.one_hot(y_true[train_mask].to(torch.int64), num_classes=349).to( - torch.float32 - ) - - y_pred = y_pred[train_mask] - - loss = F.cross_entropy(y_pred, y_true) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - total_loss += loss.item() - - del y_true - del y_pred - del loss - del hetero_data - gc.collect() - - end_time_train = time.perf_counter_ns() - print( - f"epoch {epoch} time: " - f"{(end_time_train - start_time_train) / 1e9:3.4f} s" - ) - print(f"loss after epoch {epoch}: {total_loss / num_batches}") - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--device", - type=int, - default=0, - help="GPU to allocate to pytorch for model, graph data, and node label storage", - required=False, - ) - - parser.add_argument( - "--features_device", - type=str, - default="0", - help="Device to allocate to pytorch for feature storage", - required=False, - ) - - parser.add_argument( - "--num_epochs", - type=int, - default=1, - help="Number of training epochs", - required=False, - ) - - return parser.parse_args() - - -def main(): - args = parse_args() - - try: - features_device = int(args.features_device) - except ValueError: - features_device = args.features_device - - train(args.device, features_device, args.num_epochs) - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/cugraph_pyg/examples/pylibcugraph_mg.py b/python/cugraph-pyg/cugraph_pyg/examples/pylibcugraph_mg.py deleted file mode 100644 index 832c5ec74f0..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/pylibcugraph_mg.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This example shows how to use cuGraph nccl-only comms, pylibcuGraph, -# and PyTorch DDP to run a multi-GPU workflow. Most users of the -# GNN packages will not interact with cuGraph directly. This example -# is intented for users who want to extend cuGraph within a DDP workflow. - -import os - -import pandas -import numpy as np -import torch -import torch.multiprocessing as tmp -import torch.distributed as dist - -import cudf - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, - cugraph_comms_get_raft_handle, -) - -from pylibcugraph import MGGraph, ResourceHandle, GraphProperties, degrees - -from ogb.nodeproppred import NodePropPredDataset - - -def init_pytorch(rank, world_size): - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - dist.init_process_group("nccl", rank=rank, world_size=world_size) - - -def calc_degree(rank: int, world_size: int, uid, edgelist): - init_pytorch(rank, world_size) - - device = rank - cugraph_comms_init(rank, world_size, uid, device) - - print(f"rank {rank} initialized cugraph") - - src = cudf.Series(np.array_split(edgelist[0], world_size)[rank]) - dst = cudf.Series(np.array_split(edgelist[1], world_size)[rank]) - - seeds = cudf.Series(np.arange(rank * 50, (rank + 1) * 50)) - handle = ResourceHandle(cugraph_comms_get_raft_handle().getHandle()) - - print("constructing graph") - G = MGGraph( - handle, - GraphProperties(is_multigraph=True, is_symmetric=False), - [src], - [dst], - ) - print("graph constructed") - - print("calculating degrees") - vertices, in_deg, out_deg = degrees(handle, G, seeds, do_expensive_check=False) - print("degrees calculated") - - print("constructing dataframe") - df = pandas.DataFrame( - {"v": vertices.get(), "in": in_deg.get(), "out": out_deg.get()} - ) - print(df) - - dist.barrier() - cugraph_comms_shutdown() - print(f"rank {rank} shut down cugraph") - - -def main(): - world_size = torch.cuda.device_count() - uid = cugraph_comms_create_unique_id() - - dataset = NodePropPredDataset("ogbn-products") - el = dataset[0][0]["edge_index"].astype("int64") - - tmp.spawn( - calc_degree, - args=(world_size, uid, el), - nprocs=world_size, - ) - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/cugraph_pyg/examples/pylibcugraph_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/pylibcugraph_sg.py deleted file mode 100644 index 2f273ee581e..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/pylibcugraph_sg.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This example shows how to use cuGraph and pylibcuGraph to run a -# single-GPU workflow. Most users of the GNN packages will not interact -# with cuGraph directly. This example is intented for users who want -# to extend cuGraph within a PyTorch workflow. - -import pandas -import numpy as np - -import cudf - -from pylibcugraph import SGGraph, ResourceHandle, GraphProperties, degrees - -from ogb.nodeproppred import NodePropPredDataset - - -def calc_degree(edgelist): - src = cudf.Series(edgelist[0]) - dst = cudf.Series(edgelist[1]) - - seeds = cudf.Series(np.arange(256)) - - print("constructing graph") - G = SGGraph( - ResourceHandle(), - GraphProperties(is_multigraph=True, is_symmetric=False), - src, - dst, - ) - print("graph constructed") - - print("calculating degrees") - vertices, in_deg, out_deg = degrees( - ResourceHandle(), G, seeds, do_expensive_check=False - ) - print("degrees calculated") - - print("constructing dataframe") - df = pandas.DataFrame( - {"v": vertices.get(), "in": in_deg.get(), "out": out_deg.get()} - ) - print(df) - - print("done") - - -def main(): - dataset = NodePropPredDataset("ogbn-products") - el = dataset[0][0]["edge_index"].astype("int64") - calc_degree(el) - - -if __name__ == "__main__": - main() diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py deleted file mode 100644 index 5c75e01e6f5..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py +++ /dev/null @@ -1,418 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This example illustrates link classification using the ogbl-wikikg2 dataset. - -import os -import json -import argparse -import warnings - -import torch - -import torch.nn.functional as F -from torch.nn import Parameter -from torch_geometric.nn import FastRGCNConv, GAE -from torch.nn.parallel import DistributedDataParallel - -from ogb.linkproppred import PygLinkPropPredDataset - -import cugraph_pyg - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_create_unique_id, - cugraph_comms_shutdown, -) - -from pylibwholegraph.torch.initialize import ( - init as wm_init, - finalize as wm_finalize, -) - - -# Enable cudf spilling to save gpu memory -from cugraph.testing.mg_utils import enable_spilling - -# Ensures that a CUDA context is not created on import of rapids. -# Allows pytorch to create the context instead -os.environ["RAPIDS_NO_INITIALIZE"] = "1" - - -def init_pytorch_worker(global_rank, local_rank, world_size, uid): - import rmm - - rmm.reinitialize(devices=[local_rank], pool_allocator=True, managed_memory=True) - - import cupy - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - cugraph_comms_init( - global_rank, - world_size, - uid, - local_rank, - ) - - wm_init(global_rank, world_size, local_rank, torch.cuda.device_count()) - - enable_spilling() - - -class RGCNEncoder(torch.nn.Module): - def __init__(self, num_nodes, hidden_channels, num_relations, num_bases=30): - super().__init__() - self.node_emb = Parameter(torch.empty(num_nodes, hidden_channels)) - self.conv1 = FastRGCNConv( - hidden_channels, hidden_channels, num_relations, num_bases=num_bases - ) - self.conv2 = FastRGCNConv( - hidden_channels, hidden_channels, num_relations, num_bases=num_bases - ) - self.reset_parameters() - - def reset_parameters(self): - torch.nn.init.xavier_uniform_(self.node_emb) - self.conv1.reset_parameters() - self.conv2.reset_parameters() - - def forward(self, edge_index, edge_type): - x = self.node_emb - x = self.conv1(x, edge_index, edge_type).relu_() - x = F.dropout(x, p=0.2, training=self.training) - x = self.conv2(x, edge_index, edge_type) - return x - - -def train(epoch, model, optimizer, train_loader, edge_feature_store, num_steps=None): - model.train() - optimizer.zero_grad() - - for i, batch in enumerate(train_loader): - r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda() - z = model.encode(batch.edge_index, r) - - loss = model.recon_loss(z, batch.edge_index) - loss.backward() - optimizer.step() - - if i % 10 == 0: - print( - f"Epoch: {epoch:02d}, Iteration: {i:02d}, Loss: {loss:.4f}", flush=True - ) - if num_steps and i == num_steps: - break - - -def test(stage, epoch, model, loader, num_steps=None): - # TODO support ROC-AUC metric - # Predict probabilities of future edges - model.eval() - - rr = 0.0 - for i, (h, h_neg, t, t_neg, r) in enumerate(loader): - if num_steps and i >= num_steps: - break - - ei = torch.concatenate( - [ - torch.stack([h, t]).cuda(), - torch.stack([h_neg.flatten(), t_neg.flatten()]).cuda(), - ], - dim=-1, - ) - - r = torch.concatenate([r, torch.repeat_interleave(r, h_neg.shape[-1])]).cuda() - - z = model.encode(ei, r) - q = model.decode(z, ei) - - _, ix = torch.sort(q, descending=True) - rr += 1.0 / (1.0 + ix[0]) - - print(f"epoch {epoch:02d} {stage} mrr:", rr / i, flush=True) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--hidden_channels", type=int, default=128) - parser.add_argument("--num_layers", type=int, default=1) - parser.add_argument("--lr", type=float, default=0.001) - parser.add_argument("--epochs", type=int, default=4) - parser.add_argument("--batch_size", type=int, default=16384) - parser.add_argument("--num_neg", type=int, default=500) - parser.add_argument("--num_pos", type=int, default=-1) - parser.add_argument("--fan_out", type=int, default=10) - parser.add_argument("--dataset", type=str, default="ogbl-wikikg2") - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--seeds_per_call", type=int, default=-1) - parser.add_argument("--n_devices", type=int, default=-1) - parser.add_argument("--skip_partition", action="store_true") - - return parser.parse_args() - - -def run_train(rank, world_size, model, data, edge_feature_store, meta, splits, args): - model = model.to(rank) - model = GAE(DistributedDataParallel(model, device_ids=[rank])) - optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) - - eli = torch.stack([splits["train"]["head"], splits["train"]["tail"]]) - - train_loader = cugraph_pyg.loader.LinkNeighborLoader( - data, - [args.fan_out] * args.num_layers, - edge_label_index=eli, - local_seeds_per_call=args.seeds_per_call if args.seeds_per_call > 0 else None, - batch_size=args.batch_size, - shuffle=True, - drop_last=True, - ) - - def get_eval_loader(stage: str): - head = splits[stage]["head"] - tail = splits[stage]["tail"] - - head_neg = splits[stage]["head_neg"][:, : args.num_neg] - tail_neg = splits[stage]["tail_neg"][:, : args.num_neg] - - rel = splits[stage]["relation"] - - return torch.utils.data.DataLoader( - torch.utils.data.TensorDataset( - head.pin_memory(), - head_neg.pin_memory(), - tail.pin_memory(), - tail_neg.pin_memory(), - rel.pin_memory(), - ), - batch_size=1, - shuffle=False, - drop_last=True, - ) - - test_loader = get_eval_loader("test") - valid_loader = get_eval_loader("valid") - - num_train_steps = (args.num_pos // args.batch_size) if args.num_pos > 0 else 100 - - for epoch in range(1, 1 + args.epochs): - train( - epoch, - model, - optimizer, - train_loader, - edge_feature_store, - num_steps=num_train_steps, - ) - test("validation", epoch, model, valid_loader, num_steps=1024) - - test("test", epoch, model, test_loader, num_steps=1024) - - wm_finalize() - cugraph_comms_shutdown() - - -def partition_data( - data, splits, meta, edge_path, rel_path, pos_path, neg_path, meta_path -): - # Split and save edge index - os.makedirs( - edge_path, - exist_ok=True, - ) - for (r, e) in enumerate(torch.tensor_split(data.edge_index, world_size, dim=1)): - rank_path = os.path.join(edge_path, f"rank={r}.pt") - torch.save( - e.clone(), - rank_path, - ) - - # Split and save edge reltypes - os.makedirs( - rel_path, - exist_ok=True, - ) - for (r, f) in enumerate(torch.tensor_split(data.edge_reltype, world_size)): - rank_path = os.path.join(rel_path, f"rank={r}.pt") - torch.save( - f.clone(), - rank_path, - ) - - # Split and save positive edges - os.makedirs( - pos_path, - exist_ok=True, - ) - for stage in ["train", "test", "valid"]: - for (r, n) in enumerate( - torch.tensor_split( - torch.stack([splits[stage]["head"], splits[stage]["tail"]]), - world_size, - dim=-1, - ) - ): - rank_path = os.path.join(pos_path, f"rank={r}_{stage}.pt") - torch.save( - n.clone(), - rank_path, - ) - - # Split and save negative edges - os.makedirs( - neg_path, - exist_ok=True, - ) - for stage in ["test", "valid"]: - for (r, n) in enumerate( - torch.tensor_split( - torch.stack([splits[stage]["head_neg"], splits[stage]["tail_neg"]]), - world_size, - dim=1, - ) - ): - rank_path = os.path.join(neg_path, f"rank={r}_{stage}.pt") - torch.save(n.clone(), rank_path) - for (r, n) in enumerate( - torch.tensor_split(splits[stage]["relation"], world_size, dim=-1) - ): - print(n) - rank_path = os.path.join(neg_path, f"rank={r}_{stage}_relation.pt") - torch.save(n.clone(), rank_path) - - with open(meta_path, "w") as f: - json.dump(meta, f) - - -def load_partitioned_data(rank, edge_path, rel_path, pos_path, neg_path, meta_path): - from cugraph_pyg.data import GraphStore, WholeFeatureStore, TensorDictFeatureStore - - graph_store = GraphStore() - feature_store = TensorDictFeatureStore() - edge_feature_store = WholeFeatureStore() - - # Load edge index - graph_store[("n", "e", "n"), "coo"] = torch.load( - os.path.join(edge_path, f"rank={rank}.pt") - ) - - # Load edge rel type - edge_feature_store[("n", "e", "n"), "rel"] = torch.load( - os.path.join(rel_path, f"rank={rank}.pt") - ) - - splits = {} - - # Load positive edges - for stage in ["train", "test", "valid"]: - head, tail = torch.load(os.path.join(pos_path, f"rank={rank}_{stage}.pt")) - splits[stage] = { - "head": head, - "tail": tail, - } - - # Load negative edges - for stage in ["test", "valid"]: - head_neg, tail_neg = torch.load( - os.path.join(neg_path, f"rank={rank}_{stage}.pt") - ) - relation = torch.load( - os.path.join(neg_path, f"rank={rank}_{stage}_relation.pt") - ) - splits[stage]["head_neg"] = head_neg - splits[stage]["tail_neg"] = tail_neg - splits[stage]["relation"] = relation - - with open(meta_path, "r") as f: - meta = json.load(f) - - return (feature_store, graph_store), edge_feature_store, splits, meta - - -if __name__ == "__main__": - args = parse_args() - - if "LOCAL_RANK" in os.environ: - torch.distributed.init_process_group("nccl") - world_size = torch.distributed.get_world_size() - global_rank = torch.distributed.get_rank() - local_rank = int(os.environ["LOCAL_RANK"]) - device = torch.device(local_rank) - - # Create the uid needed for cuGraph comms - if global_rank == 0: - cugraph_id = [cugraph_comms_create_unique_id()] - else: - cugraph_id = [None] - torch.distributed.broadcast_object_list(cugraph_id, src=0, device=device) - cugraph_id = cugraph_id[0] - - init_pytorch_worker(global_rank, local_rank, world_size, cugraph_id) - - # Split the data - edge_path = os.path.join(args.dataset_root, args.dataset + "_eix_part") - rel_path = os.path.join(args.dataset_root, args.dataset + "_rel_part") - pos_path = os.path.join(args.dataset_root, args.dataset + "_e_pos_part") - neg_path = os.path.join(args.dataset_root, args.dataset + "_e_neg_part") - meta_path = os.path.join(args.dataset_root, args.dataset + "_meta.json") - - if not args.skip_partition and global_rank == 0: - data = PygLinkPropPredDataset(args.dataset, root=args.dataset_root) - dataset = data[0] - - splits = data.get_edge_split() - - meta = {} - meta["num_nodes"] = int(dataset.num_nodes) - meta["num_rels"] = int(dataset.edge_reltype.max()) + 1 - - partition_data( - dataset, - splits, - meta, - edge_path=edge_path, - rel_path=rel_path, - pos_path=pos_path, - neg_path=neg_path, - meta_path=meta_path, - ) - del data - del dataset - del splits - torch.distributed.barrier() - - # Load partitions - data, edge_feature_store, splits, meta = load_partitioned_data( - rank=global_rank, - edge_path=edge_path, - rel_path=rel_path, - pos_path=pos_path, - neg_path=neg_path, - meta_path=meta_path, - ) - torch.distributed.barrier() - - model = RGCNEncoder( - meta["num_nodes"], - hidden_channels=args.hidden_channels, - num_relations=meta["num_rels"], - ) - - run_train( - global_rank, world_size, model, data, edge_feature_store, meta, splits, args - ) - else: - warnings.warn("This script should be run with 'torchrun`. Exiting.") diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py deleted file mode 100644 index 67d7eecc7c2..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This example illustrates link classification using the ogbl-wikikg2 dataset. - -import argparse - -from typing import Tuple, Dict, Any - -import torch -import cupy - -import rmm -from rmm.allocators.cupy import rmm_cupy_allocator -from rmm.allocators.torch import rmm_torch_allocator - -# Must change allocators immediately upon import -# or else other imports will cause memory to be -# allocated and prevent changing the allocator -rmm.reinitialize(devices=[0], pool_allocator=True, managed_memory=True) -cupy.cuda.set_allocator(rmm_cupy_allocator) -torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - -import torch.nn.functional as F # noqa: E402 -from torch.nn import Parameter # noqa: E402 -from torch_geometric.nn import FastRGCNConv, GAE # noqa: E402 -import torch_geometric # noqa: E402 -import cugraph_pyg # noqa: E402 - -# Enable cudf spilling to save gpu memory -from cugraph.testing.mg_utils import enable_spilling # noqa: E402 - -enable_spilling() - - -class RGCNEncoder(torch.nn.Module): - def __init__(self, num_nodes, hidden_channels, num_relations, num_bases=30): - super().__init__() - self.node_emb = Parameter(torch.empty(num_nodes, hidden_channels)) - self.conv1 = FastRGCNConv( - hidden_channels, hidden_channels, num_relations, num_bases=num_bases - ) - self.conv2 = FastRGCNConv( - hidden_channels, hidden_channels, num_relations, num_bases=num_bases - ) - self.reset_parameters() - - def reset_parameters(self): - torch.nn.init.xavier_uniform_(self.node_emb) - self.conv1.reset_parameters() - self.conv2.reset_parameters() - - def forward(self, edge_index, edge_type): - x = self.node_emb - x = self.conv1(x, edge_index, edge_type).relu_() - x = F.dropout(x, p=0.2, training=self.training) - x = self.conv2(x, edge_index, edge_type) - return x - - -def load_data( - dataset_str, dataset_root: str -) -> Tuple[ - Tuple["torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore"], - "torch_geometric.data.FeatureStore", - Dict[str, Dict[str, "torch.Tensor"]], - Dict[str, Any], -]: - from ogb.linkproppred import PygLinkPropPredDataset - - data = PygLinkPropPredDataset(dataset_str, root=dataset_root) - dataset = data[0] - - splits = data.get_edge_split() - - from cugraph_pyg.data import GraphStore, TensorDictFeatureStore - - graph_store = GraphStore() - feature_store = TensorDictFeatureStore() - edge_feature_store = TensorDictFeatureStore() - meta = {} - - graph_store[("n", "e", "n"), "coo"] = dataset.edge_index - edge_feature_store[("n", "e", "n"), "rel"] = dataset.edge_reltype.pin_memory() - meta["num_nodes"] = dataset.num_nodes - meta["num_rels"] = dataset.edge_reltype.max() + 1 - - return (feature_store, graph_store), edge_feature_store, splits, meta - - -def train(epoch, model, optimizer, train_loader, edge_feature_store): - model.train() - optimizer.zero_grad() - - for i, batch in enumerate(train_loader): - r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda() - z = model.encode(batch.edge_index, r) - - loss = model.recon_loss(z, batch.edge_index) - loss.backward() - optimizer.step() - - if i % 10 == 0: - print(f"Epoch: {epoch:02d}, Iteration: {i:02d}, Loss: {loss:.4f}") - if i == 100: - break - - -def test(stage, epoch, model, loader, num_steps=None): - # TODO support ROC-AUC metric - # Predict probabilities of future edges - model.eval() - - rr = 0.0 - for i, (h, h_neg, t, t_neg, r) in enumerate(loader): - if num_steps and i >= num_steps: - break - - ei = torch.concatenate( - [ - torch.stack([h, t]).cuda(), - torch.stack([h_neg.flatten(), t_neg.flatten()]).cuda(), - ], - dim=-1, - ) - - r = torch.concatenate([r, torch.repeat_interleave(r, h_neg.shape[-1])]).cuda() - - z = model.encode(ei, r) - q = model.decode(z, ei) - - _, ix = torch.sort(q, descending=True) - rr += 1.0 / (1.0 + ix[0]) - - print(f"epoch {epoch:02d} {stage} mrr:", rr / i) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--hidden_channels", type=int, default=128) - parser.add_argument("--num_layers", type=int, default=1) - parser.add_argument("--lr", type=float, default=0.001) - parser.add_argument("--epochs", type=int, default=4) - parser.add_argument("--batch_size", type=int, default=16384) - parser.add_argument("--num_neg", type=int, default=500) - parser.add_argument("--fan_out", type=int, default=10) - parser.add_argument("--dataset", type=str, default="ogbl-wikikg2") - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--seeds_per_call", type=int, default=-1) - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - data, edge_feature_store, splits, meta = load_data(args.dataset, args.dataset_root) - - model = GAE( - RGCNEncoder( - meta["num_nodes"], - hidden_channels=args.hidden_channels, - num_relations=meta["num_rels"], - ) - ).cuda() - optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) - - train_loader = cugraph_pyg.loader.LinkNeighborLoader( - data, - [args.fan_out] * args.num_layers, - edge_label_index=torch.stack( - [splits["train"]["head"], splits["train"]["tail"]] - ), - local_seeds_per_call=args.seeds_per_call if args.seeds_per_call > 0 else None, - batch_size=args.batch_size, - shuffle=True, - drop_last=True, - ) - - def get_eval_loader(stage: str): - head = splits[stage]["head"] - tail = splits[stage]["tail"] - - head_neg = splits[stage]["head_neg"][:, : args.num_neg] - tail_neg = splits[stage]["tail_neg"][:, : args.num_neg] - - rel = splits[stage]["relation"] - - return torch.utils.data.DataLoader( - torch.utils.data.TensorDataset( - head.pin_memory(), - head_neg.pin_memory(), - tail.pin_memory(), - tail_neg.pin_memory(), - rel.pin_memory(), - ), - batch_size=1, - shuffle=False, - drop_last=True, - ) - - test_loader = get_eval_loader("test") - valid_loader = get_eval_loader("valid") - - for epoch in range(1, 1 + args.epochs): - train(epoch, model, optimizer, train_loader, edge_feature_store) - test("validation", epoch, model, valid_loader, num_steps=1024) - - test("test", epoch, model, test_loader, num_steps=1024) diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py deleted file mode 100644 index 2c0ae53a08e..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py +++ /dev/null @@ -1,320 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This example illustrates link classification using the ogbl-wikikg2 dataset. - -import os -import argparse -import warnings - -from typing import Tuple, Any - -import torch - -import torch.nn.functional as F -from torch.nn import Parameter -from torch_geometric.nn import FastRGCNConv, GAE -from torch.nn.parallel import DistributedDataParallel - -import torch_geometric -import cugraph_pyg - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_create_unique_id, - cugraph_comms_shutdown, -) - -from pylibwholegraph.torch.initialize import ( - init as wm_init, - finalize as wm_finalize, -) - - -# Enable cudf spilling to save gpu memory -from cugraph.testing.mg_utils import enable_spilling - -# Ensures that a CUDA context is not created on import of rapids. -# Allows pytorch to create the context instead -os.environ["RAPIDS_NO_INITIALIZE"] = "1" - - -def init_pytorch_worker(rank, world_size, uid): - import rmm - - rmm.reinitialize(devices=[rank], pool_allocator=True, managed_memory=True) - - import cupy - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - cugraph_comms_init( - rank, - world_size, - uid, - rank, - ) - - wm_init(rank, world_size, rank, world_size) - - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - torch.distributed.init_process_group( - "nccl", - rank=rank, - world_size=world_size, - ) - - enable_spilling() - - -class RGCNEncoder(torch.nn.Module): - def __init__(self, num_nodes, hidden_channels, num_relations, num_bases=30): - super().__init__() - self.node_emb = Parameter(torch.empty(num_nodes, hidden_channels)) - self.conv1 = FastRGCNConv( - hidden_channels, hidden_channels, num_relations, num_bases=num_bases - ) - self.conv2 = FastRGCNConv( - hidden_channels, hidden_channels, num_relations, num_bases=num_bases - ) - self.reset_parameters() - - def reset_parameters(self): - torch.nn.init.xavier_uniform_(self.node_emb) - self.conv1.reset_parameters() - self.conv2.reset_parameters() - - def forward(self, edge_index, edge_type): - x = self.node_emb - x = self.conv1(x, edge_index, edge_type).relu_() - x = F.dropout(x, p=0.2, training=self.training) - x = self.conv2(x, edge_index, edge_type) - return x - - -def load_data( - rank: int, - world_size: int, - data: Any, -) -> Tuple[ - Tuple["torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore"], - "torch_geometric.data.FeatureStore", -]: - from cugraph_pyg.data import GraphStore, WholeFeatureStore, TensorDictFeatureStore - - graph_store = GraphStore() - feature_store = TensorDictFeatureStore() # empty fs required by PyG - edge_feature_store = WholeFeatureStore() - - graph_store[("n", "e", "n"), "coo"] = torch.tensor_split( - data.edge_index.cuda(), world_size, dim=1 - )[rank] - - edge_feature_store[("n", "e", "n"), "rel"] = torch.tensor_split( - data.edge_reltype.cuda(), - world_size, - )[rank] - - return (feature_store, graph_store), edge_feature_store - - -def train(epoch, model, optimizer, train_loader, edge_feature_store, num_steps=None): - model.train() - optimizer.zero_grad() - - for i, batch in enumerate(train_loader): - r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda() - z = model.encode(batch.edge_index, r) - - loss = model.recon_loss(z, batch.edge_index) - loss.backward() - optimizer.step() - - if i % 10 == 0: - print( - f"Epoch: {epoch:02d}, Iteration: {i:02d}, Loss: {loss:.4f}", flush=True - ) - if num_steps and i == num_steps: - break - - -def test(stage, epoch, model, loader, num_steps=None): - # TODO support ROC-AUC metric - # Predict probabilities of future edges - model.eval() - - rr = 0.0 - for i, (h, h_neg, t, t_neg, r) in enumerate(loader): - if num_steps and i >= num_steps: - break - - ei = torch.concatenate( - [ - torch.stack([h, t]).cuda(), - torch.stack([h_neg.flatten(), t_neg.flatten()]).cuda(), - ], - dim=-1, - ) - - r = torch.concatenate([r, torch.repeat_interleave(r, h_neg.shape[-1])]).cuda() - - z = model.encode(ei, r) - q = model.decode(z, ei) - - _, ix = torch.sort(q, descending=True) - rr += 1.0 / (1.0 + ix[0]) - - print(f"epoch {epoch:02d} {stage} mrr:", rr / i, flush=True) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--hidden_channels", type=int, default=128) - parser.add_argument("--num_layers", type=int, default=1) - parser.add_argument("--lr", type=float, default=0.001) - parser.add_argument("--epochs", type=int, default=4) - parser.add_argument("--batch_size", type=int, default=16384) - parser.add_argument("--num_neg", type=int, default=500) - parser.add_argument("--num_pos", type=int, default=-1) - parser.add_argument("--fan_out", type=int, default=10) - parser.add_argument("--dataset", type=str, default="ogbl-wikikg2") - parser.add_argument("--dataset_root", type=str, default="dataset") - parser.add_argument("--seeds_per_call", type=int, default=-1) - parser.add_argument("--n_devices", type=int, default=-1) - - return parser.parse_args() - - -def run_train(rank, world_size, uid, model, data, meta, splits, args): - init_pytorch_worker( - rank, - world_size, - uid, - ) - - model = model.to(rank) - model = GAE(DistributedDataParallel(model, device_ids=[rank])) - optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) - - data, edge_feature_store = load_data(rank, world_size, data) - - eli = torch.stack( - [ - torch.tensor_split(splits["train"]["head"], world_size)[rank], - torch.tensor_split(splits["train"]["tail"], world_size)[rank], - ] - ) - - train_loader = cugraph_pyg.loader.LinkNeighborLoader( - data, - [args.fan_out] * args.num_layers, - edge_label_index=eli, - local_seeds_per_call=args.seeds_per_call if args.seeds_per_call > 0 else None, - batch_size=args.batch_size, - shuffle=True, - drop_last=True, - ) - - def get_eval_loader(stage: str): - head = torch.tensor_split(splits[stage]["head"], world_size)[rank] - tail = torch.tensor_split(splits[stage]["tail"], world_size)[rank] - - head_neg = torch.tensor_split( - splits[stage]["head_neg"][:, : args.num_neg], world_size - )[rank] - tail_neg = torch.tensor_split( - splits[stage]["tail_neg"][:, : args.num_neg], world_size - )[rank] - - rel = torch.tensor_split(splits[stage]["relation"], world_size)[rank] - - return torch.utils.data.DataLoader( - torch.utils.data.TensorDataset( - head.pin_memory(), - head_neg.pin_memory(), - tail.pin_memory(), - tail_neg.pin_memory(), - rel.pin_memory(), - ), - batch_size=1, - shuffle=False, - drop_last=True, - ) - - test_loader = get_eval_loader("test") - valid_loader = get_eval_loader("valid") - - num_train_steps = (args.num_pos // args.batch_size) if args.num_pos > 0 else 100 - - for epoch in range(1, 1 + args.epochs): - train( - epoch, - model, - optimizer, - train_loader, - edge_feature_store, - num_steps=num_train_steps, - ) - test("validation", epoch, model, valid_loader, num_steps=1024) - - test("test", epoch, model, test_loader, num_steps=1024) - - wm_finalize() - cugraph_comms_shutdown() - - -if __name__ == "__main__": - if "CI_RUN" in os.environ and os.environ["CI_RUN"] == "1": - warnings.warn("Skipping SMNG example in CI due to memory limit") - else: - args = parse_args() - - # change the allocator before any allocations are made - from rmm.allocators.torch import rmm_torch_allocator - - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - - # import ogb here to stop it from creating a context and breaking pytorch/rmm - from ogb.linkproppred import PygLinkPropPredDataset - - data = PygLinkPropPredDataset(args.dataset, root=args.dataset_root) - dataset = data[0] - - splits = data.get_edge_split() - - meta = {} - meta["num_nodes"] = dataset.num_nodes - meta["num_rels"] = dataset.edge_reltype.max() + 1 - - model = RGCNEncoder( - meta["num_nodes"], - hidden_channels=args.hidden_channels, - num_relations=meta["num_rels"], - ) - - print("Data =", data) - if args.n_devices == -1: - world_size = torch.cuda.device_count() - else: - world_size = args.n_devices - print("Using", world_size, "GPUs...") - - uid = cugraph_comms_create_unique_id() - torch.multiprocessing.spawn( - run_train, - (world_size, uid, model, data, meta, splits, args), - nprocs=world_size, - join=True, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/examples/start_dask.sh b/python/cugraph-pyg/cugraph_pyg/examples/start_dask.sh deleted file mode 100755 index 54c82f81298..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/examples/start_dask.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -WORKER_RMM_POOL_SIZE=14G \ -CUDA_VISIBLE_DEVICES=0,1 \ -SCHEDULER_FILE=$(pwd)/scheduler.json \ -../../../../mg_utils/run-dask-process.sh \ - scheduler workers \ - --tcp diff --git a/python/cugraph-pyg/cugraph_pyg/loader/__init__.py b/python/cugraph-pyg/cugraph_pyg/loader/__init__.py deleted file mode 100644 index c804b3d1f97..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/loader/__init__.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from cugraph_pyg.loader.node_loader import NodeLoader -from cugraph_pyg.loader.neighbor_loader import NeighborLoader - -from cugraph_pyg.loader.link_loader import LinkLoader -from cugraph_pyg.loader.link_neighbor_loader import LinkNeighborLoader - -from cugraph_pyg.loader.dask_node_loader import DaskNeighborLoader - -from cugraph_pyg.loader.dask_node_loader import BulkSampleLoader - - -def CuGraphNeighborLoader(*args, **kwargs): - warnings.warn( - "CuGraphNeighborLoader has been renamed to DaskNeighborLoader", FutureWarning - ) - return DaskNeighborLoader(*args, **kwargs) diff --git a/python/cugraph-pyg/cugraph_pyg/loader/dask_node_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/dask_node_loader.py deleted file mode 100644 index 9b24281b190..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/loader/dask_node_loader.py +++ /dev/null @@ -1,558 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import tempfile - -import os -import re -import warnings - -import cupy -import cudf - -from cugraph.gnn import BulkSampler -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph_pyg.data import DaskGraphStore -from cugraph_pyg.sampler.sampler_utils import ( - _sampler_output_from_sampling_results_heterogeneous, - _sampler_output_from_sampling_results_homogeneous_csr, - _sampler_output_from_sampling_results_homogeneous_coo, -) - -from typing import Union, Tuple, Sequence, List, Dict - -torch_geometric = import_optional("torch_geometric") -torch = import_optional("torch") -InputNodes = ( - Sequence - if isinstance(torch_geometric, MissingModule) - else torch_geometric.typing.InputNodes -) - - -class BulkSampleLoader: - """ - Iterator that executes sampling using Dask and cuGraph and - loads sampled minibatches from disk. - """ - - __ex_parquet_file = re.compile(r"batch=([0-9]+)\-([0-9]+)\.parquet") - - def __init__( - self, - feature_store: DaskGraphStore, - graph_store: DaskGraphStore, - input_nodes: InputNodes = None, - batch_size: int = 0, - *, - shuffle: bool = False, - drop_last: bool = True, - edge_types: Sequence[Tuple[str]] = None, - directory: Union[str, tempfile.TemporaryDirectory] = None, - input_files: List[str] = None, - starting_batch_id: int = 0, - batches_per_partition: int = 100, - # Sampler args - num_neighbors: Union[List[int], Dict[Tuple[str, str, str], List[int]]] = None, - replace: bool = True, - compression: str = "COO", - # Other kwargs for the BulkSampler - **kwargs, - ): - """ - Executes a bulk sampling job immediately upon creation. - Allows iteration over the returned results. - - Parameters - ---------- - feature_store: DaskGraphStore - The feature store containing features for the graph. - - graph_store: DaskGraphStore - The graph store containing the graph structure. - - input_nodes: InputNodes - The input nodes associated with this sampler. - If None, this loader will load batches - from disk rather than performing sampling in memory. - - batch_size: int - The number of input nodes per sampling batch. - Generally required unless loading already-sampled - data from disk. - - shuffle: bool (optional, default=False) - Whether to shuffle the input indices. - If True, will shuffle the input indices. - If False, will create batches in the original order. - - edge_types: Sequence[Tuple[str]] (optional, default=None) - The desired edge types for the subgraph. - Defaults to all edges in the graph. - - directory: str (optional, default=new tempdir) - The path of the directory to write samples to. - Defaults to a new generated temporary directory. - - input_files: List[str] (optional, default=None) - The input files to read from the directory containing - samples. This argument is only used when loading - alread-sampled batches from disk. - - starting_batch_id: int (optional, default=0) - The starting id for each batch. Defaults to 0. - - batches_per_partition: int (optional, default=100) - The number of batches in each output partition. - Defaults to 100. Gets passed to the bulk - sampler if there is one; otherwise, this argument - is used to determine which files to read. - - num_neighbors: Union[List[int], - Dict[Tuple[str, str, str], List[int]]] (required) - The number of neighbors to sample for each node in each iteration. - If an entry is set to -1, all neighbors will be included. - In heterogeneous graphs, may also take in a dictionary denoting - the number of neighbors to sample for each individual edge type. - - Note: in cuGraph, only one value of num_neighbors is currently supported. - Passing in a dictionary will result in an exception. - """ - - self.__feature_store = feature_store - self.__graph_store = graph_store - self.__next_batch = -1 - self.__end_exclusive = -1 - self.__batches_per_partition = batches_per_partition - self.__starting_batch_id = starting_batch_id - - self._total_read_time = 0.0 - self._total_convert_time = 0.0 - self._total_feature_time = 0.0 - - if input_nodes is None: - # Will be loading from disk - self.__num_batches = input_nodes - self.__directory = directory - if input_files is None: - if isinstance(self.__directory, str): - self.__input_files = iter(os.listdir(self.__directory)) - else: - self.__input_files = iter(os.listdir(self.__directory.name)) - else: - self.__input_files = iter(input_files) - return - - # To accommodate DLFW/PyG 2.5 - get_input_nodes = torch_geometric.loader.utils.get_input_nodes - get_input_nodes_kwargs = {} - if "input_id" in get_input_nodes.__annotations__: - get_input_nodes_kwargs["input_id"] = None - input_node_info = get_input_nodes( - (feature_store, graph_store), input_nodes, **get_input_nodes_kwargs - ) - - # PyG 2.4 - if len(input_node_info) == 2: - input_type, input_nodes = input_node_info - # PyG 2.5 - elif len(input_node_info) == 3: - input_type, input_nodes, input_id = input_node_info - # Invalid - else: - raise ValueError("Invalid output from get_input_nodes") - - if input_type is not None: - input_nodes = graph_store._get_sample_from_vertex_groups( - {input_type: input_nodes} - ) - - if batch_size is None or batch_size < 1: - raise ValueError("Batch size must be >= 1") - - self.__directory = ( - tempfile.TemporaryDirectory() if directory is None else directory - ) - - if isinstance(num_neighbors, dict): - raise ValueError("num_neighbors dict is currently unsupported!") - - if "renumber" in kwargs: - warnings.warn( - "Setting renumbering manually could result in invalid output," - " please ensure you intended to do this." - ) - renumber = kwargs.pop("renumber") - else: - renumber = ( - True - if ( - (len(self.__graph_store.node_types) == 1) - and (len(self.__graph_store.edge_types) == 1) - ) - else False - ) - - bulk_sampler = BulkSampler( - batch_size, - self.__directory - if isinstance(self.__directory, str) - else self.__directory.name, - self.__graph_store._subgraph(edge_types), - fanout_vals=num_neighbors, - with_replacement=replace, - batches_per_partition=self.__batches_per_partition, - renumber=renumber, - use_legacy_names=False, - deduplicate_sources=True, - prior_sources_behavior="exclude", - include_hop_column=(compression == "COO"), - **kwargs, - ) - - # Make sure indices are in cupy - input_nodes = cupy.asarray(input_nodes) - - # Shuffle - if shuffle: - cupy.random.shuffle(input_nodes) - - # Truncate if we can't evenly divide the input array - stop = (len(input_nodes) // batch_size) * batch_size - input_nodes, remainder = cupy.array_split(input_nodes, [stop]) - - # Split into batches - input_nodes = cupy.split(input_nodes, max(len(input_nodes) // batch_size, 1)) - - if not drop_last: - input_nodes.append(remainder) - - self.__num_batches = 0 - for batch_num, batch_i in enumerate(input_nodes): - batch_len = len(batch_i) - if batch_len > 0: - self.__num_batches += 1 - bulk_sampler.add_batches( - cudf.DataFrame( - { - "start": batch_i, - "batch": cupy.full( - batch_len, batch_num + starting_batch_id, dtype="int32" - ), - } - ), - start_col_name="start", - batch_col_name="batch", - ) - - bulk_sampler.flush() - self.__input_files = iter( - os.listdir( - self.__directory - if isinstance(self.__directory, str) - else self.__directory.name - ) - ) - - def __next__(self): - from time import perf_counter - - start_time_read_data = perf_counter() - - # Load the next set of sampling results if necessary - if self.__next_batch >= self.__end_exclusive: - if self.__directory is None: - raise StopIteration - - # Read the next parquet file into memory - dir_path = ( - self.__directory - if isinstance(self.__directory, str) - else self.__directory.name - ) - - # Will raise StopIteration if there are no files left - try: - fname = next(self.__input_files) - except StopIteration as ex: - # Won't delete a non-temp dir (since it would just be deleting a string) - del self.__directory - self.__directory = None - raise StopIteration(ex) - - m = self.__ex_parquet_file.match(fname) - if m is None: - raise ValueError(f"Invalid parquet filename {fname}") - - self.__start_inclusive, end_inclusive = [int(g) for g in m.groups()] - self.__next_batch = self.__start_inclusive - self.__end_exclusive = end_inclusive + 1 - - parquet_path = os.path.join( - dir_path, - fname, - ) - - raw_sample_data = cudf.read_parquet(parquet_path) - - if "map" in raw_sample_data.columns: - if "renumber_map_offsets" not in raw_sample_data.columns: - num_batches = end_inclusive - self.__start_inclusive + 1 - - map_end = raw_sample_data["map"].iloc[num_batches] - - map = torch.as_tensor( - raw_sample_data["map"].iloc[0:map_end], device="cuda" - ) - raw_sample_data.drop("map", axis=1, inplace=True) - - self.__renumber_map_offsets = map[0 : num_batches + 1] - map[0] - self.__renumber_map = map[num_batches + 1 :] - else: - self.__renumber_map = raw_sample_data["map"] - self.__renumber_map_offsets = raw_sample_data[ - "renumber_map_offsets" - ] - raw_sample_data.drop( - columns=["map", "renumber_map_offsets"], inplace=True - ) - - self.__renumber_map.dropna(inplace=True) - self.__renumber_map = torch.as_tensor( - self.__renumber_map, device="cuda" - ) - - self.__renumber_map_offsets.dropna(inplace=True) - self.__renumber_map_offsets = torch.as_tensor( - self.__renumber_map_offsets, device="cuda" - ) - - else: - self.__renumber_map = None - - self.__data = raw_sample_data - self.__coo = "majors" in self.__data.columns - if self.__coo: - self.__data.dropna(inplace=True) - - if ( - len(self.__graph_store.edge_types) == 1 - and len(self.__graph_store.node_types) == 1 - ): - if self.__coo: - group_cols = ["batch_id", "hop_id"] - self.__data_index = self.__data.groupby( - group_cols, as_index=True - ).agg({"majors": "max", "minors": "max"}) - self.__data_index.rename( - columns={"majors": "src_max", "minors": "dst_max"}, - inplace=True, - ) - self.__data_index = self.__data_index.to_dict(orient="index") - else: - self.__data_index = None - - self.__label_hop_offsets = self.__data["label_hop_offsets"] - self.__data.drop(columns=["label_hop_offsets"], inplace=True) - self.__label_hop_offsets.dropna(inplace=True) - self.__label_hop_offsets = torch.as_tensor( - self.__label_hop_offsets, device="cuda" - ) - self.__label_hop_offsets -= self.__label_hop_offsets[0].clone() - - self.__major_offsets = self.__data["major_offsets"] - self.__data.drop(columns="major_offsets", inplace=True) - self.__major_offsets.dropna(inplace=True) - self.__major_offsets = torch.as_tensor( - self.__major_offsets, device="cuda" - ) - self.__major_offsets -= self.__major_offsets[0].clone() - - self.__minors = self.__data["minors"] - self.__data.drop(columns="minors", inplace=True) - self.__minors.dropna(inplace=True) - self.__minors = torch.as_tensor(self.__minors, device="cuda") - - num_batches = self.__end_exclusive - self.__start_inclusive - offsets_len = len(self.__label_hop_offsets) - 1 - if offsets_len % num_batches != 0: - raise ValueError("invalid label-hop offsets") - self.__fanout_length = int(offsets_len / num_batches) - - end_time_read_data = perf_counter() - self._total_read_time += end_time_read_data - start_time_read_data - - # Pull the next set of sampling results out of the dataframe in memory - if self.__coo: - f = self.__data["batch_id"] == self.__next_batch - if self.__renumber_map is not None: - i = self.__next_batch - self.__start_inclusive - - # this should avoid d2h copy - current_renumber_map = self.__renumber_map[ - self.__renumber_map_offsets[i] : self.__renumber_map_offsets[i + 1] - ] - - else: - current_renumber_map = None - - start_time_convert = perf_counter() - # Get and return the sampled subgraph - if ( - len(self.__graph_store.edge_types) == 1 - and len(self.__graph_store.node_types) == 1 - ): - if self.__coo: - sampler_output = _sampler_output_from_sampling_results_homogeneous_coo( - self.__data[f], - current_renumber_map, - self.__graph_store, - self.__data_index, - self.__next_batch, - ) - else: - i = (self.__next_batch - self.__start_inclusive) * self.__fanout_length - current_label_hop_offsets = self.__label_hop_offsets[ - i : i + self.__fanout_length + 1 - ] - - current_major_offsets = self.__major_offsets[ - current_label_hop_offsets[0] : (current_label_hop_offsets[-1] + 1) - ] - - current_minors = self.__minors[ - current_major_offsets[0] : current_major_offsets[-1] - ] - - sampler_output = _sampler_output_from_sampling_results_homogeneous_csr( - current_major_offsets, - current_minors, - current_renumber_map, - self.__graph_store, - current_label_hop_offsets, - self.__data_index, - self.__next_batch, - ) - else: - sampler_output = _sampler_output_from_sampling_results_heterogeneous( - self.__data[f], current_renumber_map, self.__graph_store - ) - - # Get ready for next iteration - self.__next_batch += 1 - - end_time_convert = perf_counter() - self._total_convert_time += end_time_convert - start_time_convert - - start_time_feature = perf_counter() - # Create a PyG HeteroData object, loading the required features - if self.__graph_store != self.__feature_store: - # TODO Possibly support this if there is an actual use case - raise ValueError("Separate graph and feature stores currently unsupported") - - out = self.__graph_store.filter( - "COO" if self.__coo else "CSC", - sampler_output.node, - sampler_output.row, - sampler_output.col, - sampler_output.edge, - ) - - # Account for CSR format in cuGraph vs. CSC format in PyG - # TODO deprecate and remove this functionality - if self.__coo and self.__graph_store.order == "CSC": - for edge_type in out.edge_index_dict: - out[edge_type].edge_index = out[edge_type].edge_index.flip(dims=[0]) - - out.set_value_dict("num_sampled_nodes", sampler_output.num_sampled_nodes) - out.set_value_dict("num_sampled_edges", sampler_output.num_sampled_edges) - - end_time_feature = perf_counter() - self._total_feature_time = end_time_feature - start_time_feature - - return out - - @property - def _starting_batch_id(self): - return self.__starting_batch_id - - def __iter__(self): - return self - - -class DaskNeighborLoader: - """ - Duck-typed version of the PyG NeighborLoader interface that uses - Dask to sample nodes using the uniform neighbor sampling algorithm. - """ - - def __init__( - self, - data: Union[DaskGraphStore, Tuple[DaskGraphStore, DaskGraphStore]], - input_nodes: Union[InputNodes, int] = None, - batch_size: int = None, - **kwargs, - ): - """ - Constructs a new DaskNeighborLoader object. - - Parameters - ---------- - data: DaskGraphStore or (DaskGraphStore, DaskGraphStore) - The DaskGraphStore or stores where the graph/feature data is held. - - batch_size: int (required) - The number of input nodes in each batch. - - input_nodes: Union[InputNodes, int] (required) - The input nodes associated with this sampler. - - **kwargs: kwargs - Keyword arguments to pass through for sampling. - i.e. "shuffle", "fanout" - See BulkSampleLoader. - """ - - if input_nodes is None: - raise ValueError("input_nodes is required") - if batch_size is None: - raise ValueError("batch_size is required") - - # Allow passing in a feature store and graph store as a tuple, as - # in the standard PyG API. If only one is passed, it is assumed - # it is behaving as both a graph store and a feature store. - if isinstance(data, (list, tuple)): - self.__feature_store, self.__graph_store = data - else: - self.__feature_store = data - self.__graph_store = data - - self.__batch_size = batch_size - self.__input_nodes = input_nodes - self.inner_loader_args = kwargs - - @property - def batch_size(self) -> int: - return self.__batch_size - - def __iter__(self): - self.current_loader = BulkSampleLoader( - self.__feature_store, - self.__graph_store, - self.__input_nodes, - self.__batch_size, - **self.inner_loader_args, - ) - - return self.current_loader diff --git a/python/cugraph-pyg/cugraph_pyg/loader/link_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/link_loader.py deleted file mode 100644 index 77e2ac4f99d..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/loader/link_loader.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -import cugraph_pyg -from typing import Union, Tuple, Callable, Optional - -from cugraph.utilities.utils import import_optional - -torch_geometric = import_optional("torch_geometric") -torch = import_optional("torch") - - -class LinkLoader: - """ - Duck-typed version of torch_geometric.loader.LinkLoader. - Loads samples from batches of input nodes using a - `~cugraph_pyg.sampler.BaseSampler.sample_from_edges` - function. - """ - - def __init__( - self, - data: Union[ - "torch_geometric.data.Data", - "torch_geometric.data.HeteroData", - Tuple[ - "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" - ], - ], - link_sampler: "cugraph_pyg.sampler.BaseSampler", - edge_label_index: "torch_geometric.typing.InputEdges" = None, - edge_label: "torch_geometric.typing.OptTensor" = None, - edge_label_time: "torch_geometric.typing.OptTensor" = None, - neg_sampling: Optional["torch_geometric.sampler.NegativeSampling"] = None, - neg_sampling_ratio: Optional[Union[int, float]] = None, - transform: Optional[Callable] = None, - transform_sampler_output: Optional[Callable] = None, - filter_per_worker: Optional[bool] = None, - custom_cls: Optional["torch_geometric.data.HeteroData"] = None, - input_id: "torch_geometric.typing.OptTensor" = None, - batch_size: int = 1, # refers to number of edges in batch - shuffle: bool = False, - drop_last: bool = False, - **kwargs, - ): - """ - Parameters - ---------- - data: Data, HeteroData, or Tuple[FeatureStore, GraphStore] - See torch_geometric.loader.NodeLoader. - link_sampler: BaseSampler - See torch_geometric.loader.LinkLoader. - edge_label_index: InputEdges - See torch_geometric.loader.LinkLoader. - edge_label: OptTensor - See torch_geometric.loader.LinkLoader. - edge_label_time: OptTensor - See torch_geometric.loader.LinkLoader. - neg_sampling: Optional[NegativeSampling] - Type of negative sampling to perform, if desired. - See torch_geometric.loader.LinkLoader. - neg_sampling_ratio: Optional[Union[int, float]] - Negative sampling ratio. Affects how many negative - samples are generated. - See torch_geometric.loader.LinkLoader. - transform: Callable (optional, default=None) - This argument currently has no effect. - transform_sampler_output: Callable (optional, default=None) - This argument currently has no effect. - filter_per_worker: bool (optional, default=False) - This argument currently has no effect. - custom_cls: HeteroData - This argument currently has no effect. This loader will - always return a Data or HeteroData object. - input_id: OptTensor - See torch_geometric.loader.LinkLoader. - - """ - if not isinstance(data, (list, tuple)) or not isinstance( - data[1], cugraph_pyg.data.GraphStore - ): - # Will eventually automatically convert these objects to cuGraph objects. - raise NotImplementedError("Currently can't accept non-cugraph graphs") - - if not isinstance(link_sampler, cugraph_pyg.sampler.BaseSampler): - raise NotImplementedError("Must provide a cuGraph sampler") - - if edge_label_time is not None: - raise ValueError("Temporal sampling is currently unsupported") - - if filter_per_worker: - warnings.warn("filter_per_worker is currently ignored") - - if custom_cls is not None: - warnings.warn("custom_cls is currently ignored") - - if transform is not None: - warnings.warn("transform is currently ignored.") - - if transform_sampler_output is not None: - warnings.warn("transform_sampler_output is currently ignored.") - - if neg_sampling_ratio is not None: - warnings.warn( - "The 'neg_sampling_ratio' argument is deprecated in PyG" - " and is not supported in cuGraph-PyG." - ) - - neg_sampling = torch_geometric.sampler.NegativeSampling.cast(neg_sampling) - - ( - input_type, - edge_label_index, - ) = torch_geometric.loader.utils.get_edge_label_index( - data, - (None, edge_label_index), - ) - - self.__input_data = torch_geometric.sampler.EdgeSamplerInput( - input_id=torch.arange( - edge_label_index[0].numel(), dtype=torch.int64, device="cuda" - ) - if input_id is None - else input_id, - row=edge_label_index[0], - col=edge_label_index[1], - label=edge_label, - time=edge_label_time, - input_type=input_type, - ) - - # Edge label check from torch_geometric.loader.LinkLoader - if ( - neg_sampling is not None - and neg_sampling.is_binary() - and edge_label is not None - and edge_label.min() == 0 - ): - edge_label = edge_label + 1 - - if ( - neg_sampling is not None - and neg_sampling.is_triplet() - and edge_label is not None - ): - raise ValueError( - "'edge_label' needs to be undefined for " - "'triplet'-based negative sampling. Please use " - "`src_index`, `dst_pos_index` and " - "`neg_pos_index` of the returned mini-batch " - "instead to differentiate between positive and " - "negative samples." - ) - - self.__data = data - - self.__link_sampler = link_sampler - self.__neg_sampling = neg_sampling - - self.__batch_size = batch_size - self.__shuffle = shuffle - self.__drop_last = drop_last - - def __iter__(self): - if self.__shuffle: - perm = torch.randperm(self.__input_data.row.numel()) - else: - perm = torch.arange(self.__input_data.row.numel()) - - if self.__drop_last: - d = perm.numel() % self.__batch_size - perm = perm[:-d] - - input_data = torch_geometric.sampler.EdgeSamplerInput( - input_id=self.__input_data.input_id[perm], - row=self.__input_data.row[perm], - col=self.__input_data.col[perm], - label=None - if self.__input_data.label is None - else self.__input_data.label[perm], - time=None - if self.__input_data.time is None - else self.__input_data.time[perm], - input_type=self.__input_data.input_type, - ) - - return cugraph_pyg.sampler.SampleIterator( - self.__data, - self.__link_sampler.sample_from_edges( - input_data, - neg_sampling=self.__neg_sampling, - ), - ) diff --git a/python/cugraph-pyg/cugraph_pyg/loader/link_neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/link_neighbor_loader.py deleted file mode 100644 index 080565368c4..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/loader/link_neighbor_loader.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from typing import Union, Tuple, Optional, Callable, List, Dict - -import cugraph_pyg -from cugraph_pyg.loader import LinkLoader -from cugraph_pyg.sampler import BaseSampler - -from cugraph.gnn import NeighborSampler, DistSampleWriter -from cugraph.utilities.utils import import_optional - -torch_geometric = import_optional("torch_geometric") - - -class LinkNeighborLoader(LinkLoader): - """ - Duck-typed version of torch_geometric.loader.LinkNeighborLoader - - Link loader that implements the neighbor sampling - algorithm used in GraphSAGE. - """ - - def __init__( - self, - data: Union[ - "torch_geometric.data.Data", - "torch_geometric.data.HeteroData", - Tuple[ - "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" - ], - ], - num_neighbors: Union[ - List[int], Dict["torch_geometric.typing.EdgeType", List[int]] - ], - edge_label_index: "torch_geometric.typing.InputEdges" = None, - edge_label: "torch_geometric.typing.OptTensor" = None, - edge_label_time: "torch_geometric.typing.OptTensor" = None, - replace: bool = False, - subgraph_type: Union[ - "torch_geometric.typing.SubgraphType", str - ] = "directional", - disjoint: bool = False, - temporal_strategy: str = "uniform", - neg_sampling: Optional["torch_geometric.sampler.NegativeSampling"] = None, - neg_sampling_ratio: Optional[Union[int, float]] = None, - time_attr: Optional[str] = None, - weight_attr: Optional[str] = None, - transform: Optional[Callable] = None, - transform_sampler_output: Optional[Callable] = None, - is_sorted: bool = False, - filter_per_worker: Optional[bool] = None, - neighbor_sampler: Optional["torch_geometric.sampler.NeighborSampler"] = None, - directed: bool = True, # Deprecated. - batch_size: int = 16, # Refers to number of edges per batch. - directory: Optional[str] = None, - batches_per_partition=256, - format: str = "parquet", - compression: Optional[str] = None, - local_seeds_per_call: Optional[int] = None, - **kwargs, - ): - """ - data: Data, HeteroData, or Tuple[FeatureStore, GraphStore] - See torch_geometric.loader.LinkNeighborLoader. - num_neighbors: List[int] or Dict[EdgeType, List[int]] - Fanout values. - See torch_geometric.loader.LinkNeighborLoader. - edge_label_index: InputEdges - Input edges for sampling. - See torch_geometric.loader.LinkNeighborLoader. - edge_label: OptTensor - Labels for input edges. - See torch_geometric.loader.LinkNeighborLoader. - edge_label_time: OptTensor - Time attribute for input edges. - See torch_geometric.loader.LinkNeighborLoader. - replace: bool (optional, default=False) - Whether to sample with replacement. - See torch_geometric.loader.LinkNeighborLoader. - subgraph_type: Union[SubgraphType, str] (optional, default='directional') - The type of subgraph to return. - Currently only 'directional' is supported. - See torch_geometric.loader.LinkNeighborLoader. - disjoint: bool (optional, default=False) - Whether to perform disjoint sampling. - Currently unsupported. - See torch_geometric.loader.LinkNeighborLoader. - temporal_strategy: str (optional, default='uniform') - Currently only 'uniform' is suppported. - See torch_geometric.loader.LinkNeighborLoader. - time_attr: str (optional, default=None) - Used for temporal sampling. - See torch_geometric.loader.LinkNeighborLoader. - weight_attr: str (optional, default=None) - Used for biased sampling. - See torch_geometric.loader.LinkNeighborLoader. - transform: Callable (optional, default=None) - See torch_geometric.loader.LinkNeighborLoader. - transform_sampler_output: Callable (optional, default=None) - See torch_geometric.loader.LinkNeighborLoader. - is_sorted: bool (optional, default=False) - Ignored by cuGraph. - See torch_geometric.loader.LinkNeighborLoader. - filter_per_worker: bool (optional, default=False) - Currently ignored by cuGraph, but this may - change once in-memory sampling is implemented. - See torch_geometric.loader.LinkNeighborLoader. - neighbor_sampler: torch_geometric.sampler.NeighborSampler - (optional, default=None) - Not supported by cuGraph. - See torch_geometric.loader.LinkNeighborLoader. - directed: bool (optional, default=True) - Deprecated. - See torch_geometric.loader.LinkNeighborLoader. - batch_size: int (optional, default=16) - The number of input nodes per output minibatch. - See torch.utils.dataloader. - directory: str (optional, default=None) - The directory where samples will be temporarily stored, - if spilling samples to disk. If None, this loader - will perform buffered in-memory sampling. - If writing to disk, setting this argument - to a tempfile.TemporaryDirectory with a context - manager is a good option but depending on the filesystem, - you may want to choose an alternative location with fast I/O - intead. - See cugraph.gnn.DistSampleWriter. - batches_per_partition: int (optional, default=256) - The number of batches per partition if writing samples to - disk. Manually tuning this parameter is not recommended - but reducing it may help conserve GPU memory. - See cugraph.gnn.DistSampleWriter. - format: str (optional, default='parquet') - If writing samples to disk, they will be written in this - file format. - See cugraph.gnn.DistSampleWriter. - compression: str (optional, default=None) - The compression type to use if writing samples to disk. - If not provided, it is automatically chosen. - local_seeds_per_call: int (optional, default=None) - The number of seeds to process within a single sampling call. - Manually tuning this parameter is not recommended but reducing - it may conserve GPU memory. The total number of seeds processed - per sampling call is equal to the sum of this parameter across - all workers. If not provided, it will be automatically - calculated. - See cugraph.gnn.DistSampler. - **kwargs - Other keyword arguments passed to the superclass. - """ - - subgraph_type = torch_geometric.sampler.base.SubgraphType(subgraph_type) - - if not directed: - subgraph_type = torch_geometric.sampler.base.SubgraphType.induced - warnings.warn( - "The 'directed' argument is deprecated. " - "Use subgraph_type='induced' instead." - ) - if subgraph_type != torch_geometric.sampler.base.SubgraphType.directional: - raise ValueError("Only directional subgraphs are currently supported") - if disjoint: - raise ValueError("Disjoint sampling is currently unsupported") - if temporal_strategy != "uniform": - warnings.warn("Only the uniform temporal strategy is currently supported") - if neighbor_sampler is not None: - raise ValueError("Passing a neighbor sampler is currently unsupported") - if time_attr is not None: - raise ValueError("Temporal sampling is currently unsupported") - if is_sorted: - warnings.warn("The 'is_sorted' argument is ignored by cuGraph.") - if not isinstance(data, (list, tuple)) or not isinstance( - data[1], cugraph_pyg.data.GraphStore - ): - # Will eventually automatically convert these objects to cuGraph objects. - raise NotImplementedError("Currently can't accept non-cugraph graphs") - - if compression is None: - compression = "CSR" - elif compression not in ["CSR", "COO"]: - raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')") - - writer = ( - None - if directory is None - else DistSampleWriter( - directory=directory, - batches_per_partition=batches_per_partition, - format=format, - ) - ) - - feature_store, graph_store = data - - if weight_attr is not None: - graph_store._set_weight_attr((feature_store, weight_attr)) - - sampler = BaseSampler( - NeighborSampler( - graph_store._graph, - writer, - retain_original_seeds=True, - fanout=num_neighbors, - prior_sources_behavior="exclude", - deduplicate_sources=True, - compression=compression, - compress_per_hop=False, - with_replacement=replace, - local_seeds_per_call=local_seeds_per_call, - biased=(weight_attr is not None), - ), - (feature_store, graph_store), - batch_size=batch_size, - ) - # TODO add heterogeneous support and pass graph_store._vertex_offsets - - super().__init__( - (feature_store, graph_store), - sampler, - edge_label_index=edge_label_index, - edge_label=edge_label, - edge_label_time=edge_label_time, - neg_sampling=neg_sampling, - neg_sampling_ratio=neg_sampling_ratio, - transform=transform, - transform_sampler_output=transform_sampler_output, - filter_per_worker=filter_per_worker, - batch_size=batch_size, - **kwargs, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py deleted file mode 100644 index 1da2c6dc381..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -from typing import Union, Tuple, Optional, Callable, List, Dict - -import cugraph_pyg -from cugraph_pyg.loader import NodeLoader -from cugraph_pyg.sampler import BaseSampler - -from cugraph.gnn import NeighborSampler, DistSampleWriter -from cugraph.utilities.utils import import_optional - -torch_geometric = import_optional("torch_geometric") - - -class NeighborLoader(NodeLoader): - """ - Duck-typed version of torch_geometric.loader.NeighborLoader - - Node loader that implements the neighbor sampling - algorithm used in GraphSAGE. - """ - - def __init__( - self, - data: Union[ - "torch_geometric.data.Data", - "torch_geometric.data.HeteroData", - Tuple[ - "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" - ], - ], - num_neighbors: Union[ - List[int], Dict["torch_geometric.typing.EdgeType", List[int]] - ], - input_nodes: "torch_geometric.typing.InputNodes" = None, - input_time: "torch_geometric.typing.OptTensor" = None, - replace: bool = False, - subgraph_type: Union[ - "torch_geometric.typing.SubgraphType", str - ] = "directional", - disjoint: bool = False, - temporal_strategy: str = "uniform", - time_attr: Optional[str] = None, - weight_attr: Optional[str] = None, - transform: Optional[Callable] = None, - transform_sampler_output: Optional[Callable] = None, - is_sorted: bool = False, - filter_per_worker: Optional[bool] = None, - neighbor_sampler: Optional["torch_geometric.sampler.NeighborSampler"] = None, - directed: bool = True, # Deprecated. - batch_size: int = 16, - directory: Optional[str] = None, - batches_per_partition=256, - format: str = "parquet", - compression: Optional[str] = None, - local_seeds_per_call: Optional[int] = None, - **kwargs, - ): - """ - data: Data, HeteroData, or Tuple[FeatureStore, GraphStore] - See torch_geometric.loader.NeighborLoader. - num_neighbors: List[int] or Dict[EdgeType, List[int]] - Fanout values. - See torch_geometric.loader.NeighborLoader. - input_nodes: InputNodes - Input nodes for sampling. - See torch_geometric.loader.NeighborLoader. - input_time: OptTensor (optional) - See torch_geometric.loader.NeighborLoader. - replace: bool (optional, default=False) - Whether to sample with replacement. - See torch_geometric.loader.NeighborLoader. - subgraph_type: Union[SubgraphType, str] (optional, default='directional') - The type of subgraph to return. - Currently only 'directional' is supported. - See torch_geometric.loader.NeighborLoader. - disjoint: bool (optional, default=False) - Whether to perform disjoint sampling. - Currently unsupported. - See torch_geometric.loader.NeighborLoader. - temporal_strategy: str (optional, default='uniform') - Currently only 'uniform' is suppported. - See torch_geometric.loader.NeighborLoader. - time_attr: str (optional, default=None) - Used for temporal sampling. - See torch_geometric.loader.NeighborLoader. - weight_attr: str (optional, default=None) - Used for biased sampling. - See torch_geometric.loader.NeighborLoader. - transform: Callable (optional, default=None) - See torch_geometric.loader.NeighborLoader. - transform_sampler_output: Callable (optional, default=None) - See torch_geometric.loader.NeighborLoader. - is_sorted: bool (optional, default=False) - Ignored by cuGraph. - See torch_geometric.loader.NeighborLoader. - filter_per_worker: bool (optional, default=False) - Currently ignored by cuGraph, but this may - change once in-memory sampling is implemented. - See torch_geometric.loader.NeighborLoader. - neighbor_sampler: torch_geometric.sampler.NeighborSampler - (optional, default=None) - Not supported by cuGraph. - See torch_geometric.loader.NeighborLoader. - directed: bool (optional, default=True) - Deprecated. - See torch_geometric.loader.NeighborLoader. - batch_size: int (optional, default=16) - The number of input nodes per output minibatch. - See torch.utils.dataloader. - directory: str (optional, default=None) - The directory where samples will be temporarily stored, - if spilling samples to disk. If None, this loader - will perform buffered in-memory sampling. - If writing to disk, setting this argument - to a tempfile.TemporaryDirectory with a context - manager is a good option but depending on the filesystem, - you may want to choose an alternative location with fast I/O - intead. - See cugraph.gnn.DistSampleWriter. - batches_per_partition: int (optional, default=256) - The number of batches per partition if writing samples to - disk. Manually tuning this parameter is not recommended - but reducing it may help conserve GPU memory. - See cugraph.gnn.DistSampleWriter. - format: str (optional, default='parquet') - If writing samples to disk, they will be written in this - file format. - See cugraph.gnn.DistSampleWriter. - compression: str (optional, default=None) - The compression type to use if writing samples to disk. - If not provided, it is automatically chosen. - local_seeds_per_call: int (optional, default=None) - The number of seeds to process within a single sampling call. - Manually tuning this parameter is not recommended but reducing - it may conserve GPU memory. The total number of seeds processed - per sampling call is equal to the sum of this parameter across - all workers. If not provided, it will be automatically - calculated. - See cugraph.gnn.DistSampler. - **kwargs - Other keyword arguments passed to the superclass. - """ - - subgraph_type = torch_geometric.sampler.base.SubgraphType(subgraph_type) - - if not directed: - subgraph_type = torch_geometric.sampler.base.SubgraphType.induced - warnings.warn( - "The 'directed' argument is deprecated. " - "Use subgraph_type='induced' instead." - ) - if subgraph_type != torch_geometric.sampler.base.SubgraphType.directional: - raise ValueError("Only directional subgraphs are currently supported") - if disjoint: - raise ValueError("Disjoint sampling is currently unsupported") - if temporal_strategy != "uniform": - warnings.warn("Only the uniform temporal strategy is currently supported") - if neighbor_sampler is not None: - raise ValueError("Passing a neighbor sampler is currently unsupported") - if time_attr is not None: - raise ValueError("Temporal sampling is currently unsupported") - if is_sorted: - warnings.warn("The 'is_sorted' argument is ignored by cuGraph.") - if not isinstance(data, (list, tuple)) or not isinstance( - data[1], cugraph_pyg.data.GraphStore - ): - # Will eventually automatically convert these objects to cuGraph objects. - raise NotImplementedError("Currently can't accept non-cugraph graphs") - - if compression is None: - compression = "CSR" - elif compression not in ["CSR", "COO"]: - raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')") - - writer = ( - None - if directory is None - else DistSampleWriter( - directory=directory, - batches_per_partition=batches_per_partition, - format=format, - ) - ) - - feature_store, graph_store = data - - if weight_attr is not None: - graph_store._set_weight_attr((feature_store, weight_attr)) - - sampler = BaseSampler( - NeighborSampler( - graph_store._graph, - writer, - retain_original_seeds=True, - fanout=num_neighbors, - prior_sources_behavior="exclude", - deduplicate_sources=True, - compression=compression, - compress_per_hop=False, - with_replacement=replace, - local_seeds_per_call=local_seeds_per_call, - biased=(weight_attr is not None), - ), - (feature_store, graph_store), - batch_size=batch_size, - ) - # TODO add heterogeneous support and pass graph_store._vertex_offsets - - super().__init__( - (feature_store, graph_store), - sampler, - input_nodes=input_nodes, - input_time=input_time, - transform=transform, - transform_sampler_output=transform_sampler_output, - filter_per_worker=filter_per_worker, - batch_size=batch_size, - **kwargs, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/loader/node_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/node_loader.py deleted file mode 100644 index 4b236f75885..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/loader/node_loader.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -import cugraph_pyg -from typing import Union, Tuple, Callable, Optional - -from cugraph.utilities.utils import import_optional - -torch_geometric = import_optional("torch_geometric") -torch = import_optional("torch") - - -class NodeLoader: - """ - Duck-typed version of torch_geometric.loader.NodeLoader. - Loads samples from batches of input nodes using a - `~cugraph_pyg.sampler.BaseSampler.sample_from_nodes` - function. - """ - - def __init__( - self, - data: Union[ - "torch_geometric.data.Data", - "torch_geometric.data.HeteroData", - Tuple[ - "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" - ], - ], - node_sampler: "cugraph_pyg.sampler.BaseSampler", - input_nodes: "torch_geometric.typing.InputNodes" = None, - input_time: "torch_geometric.typing.OptTensor" = None, - transform: Optional[Callable] = None, - transform_sampler_output: Optional[Callable] = None, - filter_per_worker: Optional[bool] = None, - custom_cls: Optional["torch_geometric.data.HeteroData"] = None, - input_id: "torch_geometric.typing.OptTensor" = None, - batch_size: int = 1, - shuffle: bool = False, - drop_last: bool = False, - **kwargs, - ): - """ - Parameters - ---------- - data: Data, HeteroData, or Tuple[FeatureStore, GraphStore] - See torch_geometric.loader.NodeLoader. - node_sampler: BaseSampler - See torch_geometric.loader.NodeLoader. - input_nodes: InputNodes - See torch_geometric.loader.NodeLoader. - input_time: OptTensor - See torch_geometric.loader.NodeLoader. - transform: Callable (optional, default=None) - This argument currently has no effect. - transform_sampler_output: Callable (optional, default=None) - This argument currently has no effect. - filter_per_worker: bool (optional, default=False) - This argument currently has no effect. - custom_cls: HeteroData - This argument currently has no effect. This loader will - always return a Data or HeteroData object. - input_id: OptTensor - See torch_geometric.loader.NodeLoader. - - """ - if not isinstance(data, (list, tuple)) or not isinstance( - data[1], cugraph_pyg.data.GraphStore - ): - # Will eventually automatically convert these objects to cuGraph objects. - raise NotImplementedError("Currently can't accept non-cugraph graphs") - - if not isinstance(node_sampler, cugraph_pyg.sampler.BaseSampler): - raise NotImplementedError("Must provide a cuGraph sampler") - - if input_time is not None: - raise ValueError("Temporal sampling is currently unsupported") - - if filter_per_worker: - warnings.warn("filter_per_worker is currently ignored") - - if custom_cls is not None: - warnings.warn("custom_cls is currently ignored") - - if transform is not None: - warnings.warn("transform is currently ignored.") - - if transform_sampler_output is not None: - warnings.warn("transform_sampler_output is currently ignored.") - - ( - input_type, - input_nodes, - input_id, - ) = torch_geometric.loader.utils.get_input_nodes( - data, - input_nodes, - input_id, - ) - - self.__input_data = torch_geometric.sampler.NodeSamplerInput( - input_id=torch.arange(len(input_nodes), dtype=torch.int64, device="cuda") - if input_id is None - else input_id, - node=input_nodes, - time=None, - input_type=input_type, - ) - - self.__data = data - - self.__node_sampler = node_sampler - - self.__batch_size = batch_size - self.__shuffle = shuffle - self.__drop_last = drop_last - - def __iter__(self): - if self.__shuffle: - perm = torch.randperm(self.__input_data.node.numel()) - else: - perm = torch.arange(self.__input_data.node.numel()) - - if self.__drop_last: - d = perm.numel() % self.__batch_size - perm = perm[:-d] - - input_data = torch_geometric.sampler.NodeSamplerInput( - input_id=self.__input_data.input_id[perm], - node=self.__input_data.node[perm], - time=None - if self.__input_data.time is None - else self.__input_data.time[perm], - input_type=self.__input_data.input_type, - ) - - return cugraph_pyg.sampler.SampleIterator( - self.__data, self.__node_sampler.sample_from_nodes(input_data) - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/__init__.py deleted file mode 100644 index 331b49ebec0..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .conv import * diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py deleted file mode 100644 index bef3a023b93..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .gat_conv import GATConv -from .gatv2_conv import GATv2Conv -from .hetero_gat_conv import HeteroGATConv -from .rgcn_conv import RGCNConv -from .sage_conv import SAGEConv -from .transformer_conv import TransformerConv - -__all__ = [ - "GATConv", - "GATv2Conv", - "HeteroGATConv", - "RGCNConv", - "SAGEConv", - "TransformerConv", -] diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py deleted file mode 100644 index 713448a8203..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -import pylibcugraphops.pytorch - - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - -# A tuple of (row, colptr, num_src_nodes) -CSC = Tuple[torch.Tensor, torch.Tensor, int] - - -class BaseConv(torch.nn.Module): # pragma: no cover - r"""An abstract base class for implementing cugraph-ops message passing layers.""" - - def reset_parameters(self): - r"""Resets all learnable parameters of the module.""" - pass - - @staticmethod - def to_csc( - edge_index: torch.Tensor, - size: Optional[Tuple[int, int]] = None, - edge_attr: Optional[torch.Tensor] = None, - ) -> Union[CSC, Tuple[CSC, torch.Tensor],]: - r"""Returns a CSC representation of an :obj:`edge_index` tensor to be - used as input to cugraph-ops conv layers. - - Args: - edge_index (torch.Tensor): The edge indices. - size ((int, int), optional). The shape of :obj:`edge_index` in each - dimension. (default: :obj:`None`) - edge_attr (torch.Tensor, optional): The edge features. - (default: :obj:`None`) - """ - if size is None: - warnings.warn( - f"Inferring the graph size from 'edge_index' causes " - f"a decline in performance and does not work for " - f"bipartite graphs. To suppress this warning, pass " - f"the 'size' explicitly in '{__name__}.to_csc()'." - ) - num_src_nodes = num_dst_nodes = int(edge_index.max()) + 1 - else: - num_src_nodes, num_dst_nodes = size - - row, col = edge_index - col, perm = torch_geometric.utils.index_sort(col, max_value=num_dst_nodes) - row = row[perm] - - colptr = torch_geometric.utils.sparse.index2ptr(col, num_dst_nodes) - - if edge_attr is not None: - return (row, colptr, num_src_nodes), edge_attr[perm] - - return row, colptr, num_src_nodes - - def get_cugraph( - self, - edge_index: Union[torch_geometric.EdgeIndex, CSC], - bipartite: bool = False, - max_num_neighbors: Optional[int] = None, - ) -> Tuple[pylibcugraphops.pytorch.CSC, Optional[torch.Tensor]]: - r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation. - Supports both bipartite and non-bipartite graphs. - - Args: - edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge - indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for - CSC representation. - bipartite (bool): If set to :obj:`True`, will create the bipartite - structure in cugraph-ops. (default: :obj:`False`) - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - """ - perm = None - if isinstance(edge_index, torch_geometric.EdgeIndex): - edge_index, perm = edge_index.sort_by("col") - num_src_nodes = edge_index.get_sparse_size(0) - (colptr, row), _ = edge_index.get_csc() - else: - row, colptr, num_src_nodes = edge_index - - if not row.is_cuda: - raise RuntimeError( - f"'{self.__class__.__name__}' requires GPU-based processing " - f"but got CPU tensor." - ) - - if max_num_neighbors is None: - max_num_neighbors = -1 - - return ( - pylibcugraphops.pytorch.CSC( - offsets=colptr, - indices=row, - num_src_nodes=num_src_nodes, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, - ), - perm, - ) - - def get_typed_cugraph( - self, - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_type: torch.Tensor, - num_edge_types: Optional[int] = None, - bipartite: bool = False, - max_num_neighbors: Optional[int] = None, - ) -> Tuple[pylibcugraphops.pytorch.HeteroCSC, Optional[torch.Tensor]]: - r"""Constructs a typed :obj:`cugraph` graph object from a CSC - representation where each edge corresponds to a given edge type. - Supports both bipartite and non-bipartite graphs. - - Args: - edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge - indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for - CSC representation. - edge_type (torch.Tensor): The edge type. - num_edge_types (int, optional): The maximum number of edge types. - When not given, will be computed on-the-fly, leading to - slightly worse performance. (default: :obj:`None`) - bipartite (bool): If set to :obj:`True`, will create the bipartite - structure in cugraph-ops. (default: :obj:`False`) - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - """ - if num_edge_types is None: - num_edge_types = int(edge_type.max()) + 1 - - if max_num_neighbors is None: - max_num_neighbors = -1 - - perm = None - if isinstance(edge_index, torch_geometric.EdgeIndex): - edge_index, perm = edge_index.sort_by("col") - edge_type = edge_type[perm] - num_src_nodes = edge_index.get_sparse_size(0) - (colptr, row), _ = edge_index.get_csc() - else: - row, colptr, num_src_nodes = edge_index - edge_type = edge_type.int() - - return ( - pylibcugraphops.pytorch.HeteroCSC( - offsets=colptr, - indices=row, - edge_types=edge_type, - num_src_nodes=num_src_nodes, - num_edge_types=num_edge_types, - dst_max_in_degree=max_num_neighbors, - is_bipartite=bipartite, - ), - perm, - ) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor): The node features. - edge_index (EdgeIndex, (torch.Tensor, torch.Tensor, int)): The edge - indices, or a tuple of :obj:`(row, colptr, num_src_nodes)` for - CSC representation. - """ - raise NotImplementedError diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py deleted file mode 100644 index 981b1c5b50d..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class GATConv(BaseConv): - r"""The graph attentional operator from the `"Graph Attention Networks" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k] - \right)\right)}. - - If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, - the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_j - \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,j}]\right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathrm{LeakyReLU}\left(\mathbf{a}^{\top} - [\mathbf{\Theta}\mathbf{x}_i \, \Vert \, \mathbf{\Theta}\mathbf{x}_k - \, \Vert \, \mathbf{\Theta}_{e} \mathbf{e}_{i,k}]\right)\right)}. - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - negative_slope (float, optional): LeakyReLU angle of the negative - slope. (default: :obj:`0.2`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). (default: :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - edge_dim: Optional[int] = None, - bias: bool = True, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.concat = concat - self.negative_slope = negative_slope - self.edge_dim = edge_dim - - Linear = torch_geometric.nn.Linear - - if isinstance(in_channels, int): - self.lin = Linear( - in_channels, - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - else: - self.lin_src = Linear( - in_channels[0], - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - self.lin_dst = Linear( - in_channels[1], - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - - if edge_dim is not None: - self.lin_edge = Linear( - edge_dim, - heads * out_channels, - bias=False, - weight_initializer="glorot", - ) - self.att = nn.Parameter(torch.Tensor(3 * heads * out_channels)) - else: - self.register_parameter("lin_edge", None) - self.att = nn.Parameter(torch.Tensor(2 * heads * out_channels)) - - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(heads * out_channels)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - if isinstance(self.in_channels, int): - self.lin.reset_parameters() - else: - self.lin_src.reset_parameters() - self.lin_dst.reset_parameters() - - torch_geometric.nn.inits.glorot( - self.att.view(-1, self.heads, self.out_channels) - ) - - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_attr: Optional[torch.Tensor] = None, - max_num_neighbors: Optional[int] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - high_precision_dgrad: bool = False, - high_precision_wgrad: bool = False, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - edge_index (EdgeIndex or CSC): The edge indices. - edge_attr: (torch.Tensor, optional) The edge features. - max_num_neighbors (int, optional): The maximum number of neighbors - of a destination node. When enabled, it allows models to use - the message-flow-graph primitives in cugraph-ops. - (default: :obj:`None`) - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - high_precision_dgrad: bool, default=False - Optional flag indicating whether gradients for inputs in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - high_precision_wgrad: bool, default=False - Optional flag indicating whether gradients for weights in half precision - are kept in single precision as long as possible and only casted to - the corresponding input type at the very end. - """ - bipartite = not isinstance(x, torch.Tensor) - graph, perm = self.get_cugraph( - edge_index=edge_index, - bipartite=bipartite, - max_num_neighbors=max_num_neighbors, - ) - - if deterministic_dgrad: - graph.add_reverse_graph() - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if edge_attr.dim() == 1: - edge_attr = edge_attr.view(-1, 1) - if perm is not None: - edge_attr = edge_attr[perm] - edge_attr = self.lin_edge(edge_attr) - - if bipartite: - if not hasattr(self, "lin_src"): - raise RuntimeError( - f"{self.__class__.__name__}.in_channels must be a pair of " - f"integers to allow bipartite node features, but got " - f"{self.in_channels}." - ) - x_src = self.lin_src(x[0]) - x_dst = self.lin_dst(x[1]) - else: - if not hasattr(self, "lin"): - raise RuntimeError( - f"{self.__class__.__name__}.in_channels is expected to be an " - f"integer, but got {self.in_channels}." - ) - x = self.lin(x) - - out = mha_gat_n2n( - (x_src, x_dst) if bipartite else x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - high_precision_dgrad=high_precision_dgrad, - high_precision_wgrad=high_precision_wgrad, - ) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py deleted file mode 100644 index ebb30de9754..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class GATv2Conv(BaseConv): - r"""The GATv2 operator from the `"How Attentive are Graph Attention - Networks?" `_ paper, which fixes the - static attention problem of the standard - :class:`~torch_geometric.conv.GATConv` layer. - Since the linear layers in the standard GAT are applied right after each - other, the ranking of attended nodes is unconditioned on the query node. - In contrast, in :class:`GATv2`, every node can attend to any other node. - - .. math:: - \mathbf{x}^{\prime}_i = \alpha_{i,i}\mathbf{\Theta}\mathbf{x}_{i} + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j}\mathbf{\Theta}\mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_j] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_k] - \right)\right)}. - - If the graph has multi-dimensional edge features :math:`\mathbf{e}_{i,j}`, - the attention coefficients :math:`\alpha_{i,j}` are computed as - - .. math:: - \alpha_{i,j} = - \frac{ - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_j \, \Vert \, \mathbf{e}_{i,j}] - \right)\right)} - {\sum_{k \in \mathcal{N}(i) \cup \{ i \}} - \exp\left(\mathbf{a}^{\top}\mathrm{LeakyReLU}\left(\mathbf{\Theta} - [\mathbf{x}_i \, \Vert \, \mathbf{x}_k \, \Vert \, \mathbf{e}_{i,k}] - \right)\right)}. - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - negative_slope (float, optional): LeakyReLU angle of the negative - slope. (default: :obj:`0.2`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). (default: :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - share_weights (bool, optional): If set to :obj:`True`, the same matrix - will be applied to the source and the target node of every edge. - (default: :obj:`False`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - edge_dim: Optional[int] = None, - bias: bool = True, - share_weights: bool = False, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.concat = concat - self.negative_slope = negative_slope - self.edge_dim = edge_dim - self.share_weights = share_weights - - Linear = torch_geometric.nn.Linear - - if isinstance(in_channels, int): - self.lin_src = Linear( - in_channels, - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - - if share_weights: - self.lin_dst = self.lin_src - else: - self.lin_dst = Linear( - in_channels, - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - else: - self.lin_src = Linear( - in_channels[0], - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - self.lin_dst = Linear( - in_channels[1], - heads * out_channels, - bias=bias, - weight_initializer="glorot", - ) - - self.att = nn.Parameter(torch.Tensor(heads * out_channels)) - - if edge_dim is not None: - self.lin_edge = Linear( - edge_dim, heads * out_channels, bias=False, weight_initializer="glorot" - ) - else: - self.register_parameter("lin_edge", None) - - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(heads * out_channels)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_src.reset_parameters() - self.lin_dst.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - - torch_geometric.nn.inits.glorot( - self.att.view(-1, self.heads, self.out_channels) - ) - - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_attr: Optional[torch.Tensor] = None, - deterministic_dgrad: bool = False, - deterministic_wgrad: bool = False, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - edge_index (EdgeIndex or CSC): The edge indices. - edge_attr: (torch.Tensor, optional) The edge features. - deterministic_dgrad : bool, default=False - Optional flag indicating whether the feature gradients - are computed deterministically using a dedicated workspace buffer. - deterministic_wgrad: bool, default=False - Optional flag indicating whether the weight gradients - are computed deterministically using a dedicated workspace buffer. - """ - bipartite = not isinstance(x, torch.Tensor) or not self.share_weights - graph, perm = self.get_cugraph(edge_index, bipartite=bipartite) - if deterministic_dgrad: - graph.add_reverse_graph() - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if edge_attr.dim() == 1: - edge_attr = edge_attr.view(-1, 1) - if perm is not None: - edge_attr = edge_attr[perm] - edge_attr = self.lin_edge(edge_attr) - - if bipartite: - if isinstance(x, torch.Tensor): - x = (x, x) - x_src = self.lin_src(x[0]) - x_dst = self.lin_dst(x[1]) - else: - x = self.lin_src(x) - - out = mha_gat_v2_n2n( - (x_src, x_dst) if bipartite else x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - deterministic_dgrad=deterministic_dgrad, - deterministic_wgrad=deterministic_wgrad, - ) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py deleted file mode 100644 index a73dd8e57ff..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union -from collections import defaultdict - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_gat_n2n - -from .base import BaseConv -from cugraph_pyg.utils.imports import package_available - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class HeteroGATConv(BaseConv): - r"""The graph attentional operator on heterogeneous graphs, where a separate - `GATConv` is applied on the homogeneous graph for each edge type. Compared - with directly wrapping `GATConv`s with `HeteroConv`, `HeteroGATConv` fuses - all the linear transformation associated with each node type together into 1 - GEMM call, to improve the performance on GPUs. - - Parameters - ---------- - in_channels : int or Dict[str, int]) - Size of each input sample of every node type. - - out_channels : int - Size of each output sample. - - node_types : List[str] - List of Node types. - - edge_types : List[Tuple[str, str, str]] - List of Edge types. - - heads : int, optional (default=1) - Number of multi-head-attentions. - - concat : bool, optional (default=True): - If set to :obj:`False`, the multi-head attentions are averaged instead - of concatenated. - - negative_slope : float, optional (default=0.2) - LeakyReLU angle of the negative slope. - - bias : bool, optional (default=True) - If set to :obj:`False`, the layer will not learn an additive bias. - - aggr : str, optional (default="sum") - The aggregation scheme to use for grouping node embeddings generated by - different relations. Choose from "sum", "mean", "min", "max". - """ - - def __init__( - self, - in_channels: Union[int, dict[str, int]], - out_channels: int, - node_types: list[str], - edge_types: list[tuple[str, str, str]], - heads: int = 1, - concat: bool = True, - negative_slope: float = 0.2, - bias: bool = True, - aggr: str = "sum", - ): - if not package_available("torch_geometric>=2.4.0"): - raise RuntimeError( - f"{self.__class__.__name__} requires torch_geometric>=2.4.0." - ) - - super().__init__() - - if isinstance(in_channels, int): - in_channels = dict.fromkeys(node_types, in_channels) - self.in_channels = in_channels - self.out_channels = out_channels - - self.node_types = node_types - self.edge_types = edge_types - self.num_heads = heads - self.concat_heads = concat - - self.negative_slope = negative_slope - self.aggr = aggr - - self.relations_per_ntype = defaultdict(lambda: ([], [])) - - lin_weights = dict.fromkeys(self.node_types) - attn_weights = dict.fromkeys(self.edge_types) - biases = dict.fromkeys(self.edge_types) - - ParameterDict = torch_geometric.nn.parameter_dict.ParameterDict - - for edge_type in self.edge_types: - src_type, _, dst_type = edge_type - self.relations_per_ntype[src_type][0].append(edge_type) - if src_type != dst_type: - self.relations_per_ntype[dst_type][1].append(edge_type) - - attn_weights[edge_type] = torch.empty( - 2 * self.num_heads * self.out_channels - ) - - if bias and concat: - biases[edge_type] = torch.empty(self.num_heads * out_channels) - elif bias: - biases[edge_type] = torch.empty(out_channels) - else: - biases[edge_type] = None - - for ntype in self.node_types: - n_src_rel = len(self.relations_per_ntype[ntype][0]) - n_dst_rel = len(self.relations_per_ntype[ntype][1]) - n_rel = n_src_rel + n_dst_rel - - lin_weights[ntype] = torch.empty( - (n_rel * self.num_heads * self.out_channels, self.in_channels[ntype]) - ) - - self.lin_weights = ParameterDict(lin_weights) - self.attn_weights = ParameterDict(attn_weights) - - if bias: - self.bias = ParameterDict(biases) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def split_tensors( - self, x_fused_dict: dict[str, torch.Tensor], dim: int - ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: - """Split fused tensors into chunks based on edge types. - - Parameters - ---------- - x_fused_dict : dict[str, torch.Tensor] - A dictionary to hold node feature for each node type. The key is - node type; the value is a fused tensor that account for all - relations for that node type. - - dim : int - Dimension along which to split the fused tensor. - - Returns - ------- - x_src_dict : dict[str, torch.Tensor] - A dictionary to hold source node feature for each relation graph. - - x_dst_dict : dict[str, torch.Tensor] - A dictionary to hold destination node feature for each relation graph. - """ - x_src_dict = dict.fromkeys(self.edge_types) - x_dst_dict = dict.fromkeys(self.edge_types) - - for ntype, t in x_fused_dict.items(): - n_src_rel = len(self.relations_per_ntype[ntype][0]) - n_dst_rel = len(self.relations_per_ntype[ntype][1]) - n_rel = n_src_rel + n_dst_rel - t_list = torch.chunk(t, chunks=n_rel, dim=dim) - - for i, src_rel in enumerate(self.relations_per_ntype[ntype][0]): - x_src_dict[src_rel] = t_list[i] - - for i, dst_rel in enumerate(self.relations_per_ntype[ntype][1]): - x_dst_dict[dst_rel] = t_list[i + n_src_rel] - - return x_src_dict, x_dst_dict - - def reset_parameters(self, seed: Optional[int] = None): - if seed is not None: - torch.manual_seed(seed) - - w_src, w_dst = self.split_tensors(self.lin_weights, dim=0) - - for edge_type in self.edge_types: - src_type, _, dst_type = edge_type - - # lin_src - torch_geometric.nn.inits.glorot(w_src[edge_type]) - - # lin_dst - if src_type != dst_type: - torch_geometric.nn.inits.glorot(w_dst[edge_type]) - - # attn_weights - torch_geometric.nn.inits.glorot( - self.attn_weights[edge_type].view(-1, self.num_heads, self.out_channels) - ) - - # bias - if self.bias is not None: - torch_geometric.nn.inits.zeros(self.bias[edge_type]) - - def forward( - self, - x_dict: dict[str, torch.Tensor], - edge_index_dict: dict[tuple[str, str, str], torch.Tensor], - ) -> dict[str, torch.Tensor]: - feat_dict = dict.fromkeys(x_dict.keys()) - - for ntype, x in x_dict.items(): - feat_dict[ntype] = x @ self.lin_weights[ntype].T - - x_src_dict, x_dst_dict = self.split_tensors(feat_dict, dim=1) - - out_dict = defaultdict(list) - - for edge_type, edge_index in edge_index_dict.items(): - src_type, _, dst_type = edge_type - - csc = BaseConv.to_csc( - edge_index, (x_dict[src_type].size(0), x_dict[dst_type].size(0)) - ) - - if src_type == dst_type: - graph, _ = self.get_cugraph( - csc, - bipartite=False, - ) - out = mha_gat_n2n( - x_src_dict[edge_type], - self.attn_weights[edge_type], - graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat_heads, - ) - - else: - graph, _ = self.get_cugraph( - csc, - bipartite=True, - ) - out = mha_gat_n2n( - (x_src_dict[edge_type], x_dst_dict[edge_type]), - self.attn_weights[edge_type], - graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat_heads, - ) - - if self.bias is not None: - out = out + self.bias[edge_type] - - out_dict[dst_type].append(out) - - for key, value in out_dict.items(): - out_dict[key] = torch_geometric.nn.conv.hetero_conv.group(value, self.aggr) - - return out_dict diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py deleted file mode 100644 index 13fa08db5c5..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import agg_hg_basis_n2n_post - -from .base import BaseConv, CSC - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class RGCNConv(BaseConv): # pragma: no cover - r"""The relational graph convolutional operator from the `"Modeling - Relational Data with Graph Convolutional Networks" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{\Theta}_{\textrm{root}} \cdot - \mathbf{x}_i + \sum_{r \in \mathcal{R}} \sum_{j \in \mathcal{N}_r(i)} - \frac{1}{|\mathcal{N}_r(i)|} \mathbf{\Theta}_r \cdot \mathbf{x}_j, - - where :math:`\mathcal{R}` denotes the set of relations, *i.e.* edge types. - Edge type needs to be a one-dimensional :obj:`torch.long` tensor which - stores a relation identifier - :math:`\in \{ 0, \ldots, |\mathcal{R}| - 1\}` for each edge. - - Args: - in_channels (int): Size of each input sample. - out_channels (int): Size of each output sample. - num_relations (int): Number of relations. - num_bases (int, optional): If set, this layer will use the - basis-decomposition regularization scheme where :obj:`num_bases` - denotes the number of bases to use. (default: :obj:`None`) - aggr (str, optional): The aggregation scheme to use - (:obj:`"add"`, :obj:`"mean"`, :obj:`"sum"`). - (default: :obj:`"mean"`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add transformed root node features to the output. - (default: :obj:`True`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: int, - out_channels: int, - num_relations: int, - num_bases: Optional[int] = None, - aggr: str = "mean", - root_weight: bool = True, - bias: bool = True, - ): - super().__init__() - - if aggr not in ["mean", "sum", "add"]: - raise ValueError( - f"Aggregation function must be chosen from 'mean', 'sum' or " - f"'add', but got '{aggr}'." - ) - - self.in_channels = in_channels - self.out_channels = out_channels - self.num_relations = num_relations - self.num_bases = num_bases - self.aggr = aggr - self.root_weight = root_weight - - dim_root_weight = 1 if root_weight else 0 - - if num_bases is not None: - self.weight = torch.nn.Parameter( - torch.empty(num_bases + dim_root_weight, in_channels, out_channels) - ) - self.comp = torch.nn.Parameter(torch.empty(num_relations, num_bases)) - else: - self.weight = torch.nn.Parameter( - torch.empty(num_relations + dim_root_weight, in_channels, out_channels) - ) - self.register_parameter("comp", None) - - if bias: - self.bias = torch.nn.Parameter(torch.empty(out_channels)) - else: - self.register_parameter("bias", None) - - self.reset_parameters() - - def reset_parameters(self): - end = -1 if self.root_weight else None - torch_geometric.nn.inits.glorot(self.weight[:end]) - torch_geometric.nn.inits.glorot(self.comp) - if self.root_weight: - torch_geometric.nn.inits.glorot(self.weight[-1]) - torch_geometric.nn.inits.zeros(self.bias) - - def forward( - self, - x: torch.Tensor, - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_type: torch.Tensor, - max_num_neighbors: Optional[int] = None, - ) -> torch.Tensor: - - graph, _ = self.get_typed_cugraph( - edge_index, - edge_type, - self.num_relations, - max_num_neighbors=max_num_neighbors, - ) - - out = agg_hg_basis_n2n_post( - x, - self.comp, - graph, - concat_own=self.root_weight, - norm_by_out_degree=bool(self.aggr == "mean"), - ) - - out = out @ self.weight.view(-1, self.out_channels) - - if self.bias is not None: - out = out + self.bias - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, num_relations={self.num_relations})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py deleted file mode 100644 index 65dc99d8988..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import agg_concat_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class SAGEConv(BaseConv): - r"""The GraphSAGE operator from the `"Inductive Representation Learning on - Large Graphs" `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot - \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j - - If :obj:`project = True`, then :math:`\mathbf{x}_j` will first get - projected via - - .. math:: - \mathbf{x}_j \leftarrow \sigma ( \mathbf{W}_3 \mathbf{x}_j + - \mathbf{b}) - - as described in Eq. (3) of the paper. - - Args: - in_channels (int or tuple): Size of each input sample. A tuple - corresponds to the sizes of source and target dimensionalities. - out_channels (int): Size of each output sample. - aggr (str or Aggregation, optional): The aggregation scheme to use. - Choose from :obj:`"mean"`, :obj:`"sum"`, :obj:`"min"` or - :obj:`"max"`. (default: :obj:`"mean"`) - normalize (bool, optional): If set to :obj:`True`, output features - will be :math:`\ell_2`-normalized, *i.e.*, - :math:`\frac{\mathbf{h}_i^{k+1}} - {\| \mathbf{h}_i^{k+1} \|_2}`. - (default: :obj:`False`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add transformed root node features to the output. - (default: :obj:`True`) - project (bool, optional): If set to :obj:`True`, the layer will apply a - linear transformation followed by an activation function before - aggregation (as described in Eq. (3) of the paper). - (default: :obj:`False`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - aggr: str = "mean", - normalize: bool = False, - root_weight: bool = True, - project: bool = False, - bias: bool = True, - ): - super().__init__() - - if aggr not in ["mean", "sum", "min", "max"]: - raise ValueError( - f"Aggregation function must be chosen from 'mean'," - f" 'sum', 'min' or 'max', but got '{aggr}'." - ) - - self.in_channels = in_channels - self.out_channels = out_channels - self.aggr = aggr - self.normalize = normalize - self.root_weight = root_weight - self.project = project - - if isinstance(in_channels, int): - self.in_channels_src = self.in_channels_dst = in_channels - else: - self.in_channels_src, self.in_channels_dst = in_channels - - if self.project: - self.pre_lin = torch_geometric.nn.Linear( - self.in_channels_src, self.in_channels_src, bias=True - ) - - if self.root_weight: - self.lin = torch_geometric.nn.Linear( - self.in_channels_src + self.in_channels_dst, out_channels, bias=bias - ) - else: - self.lin = torch_geometric.nn.Linear( - self.in_channels_src, out_channels, bias=bias - ) - - self.reset_parameters() - - def reset_parameters(self): - if self.project: - self.pre_lin.reset_parameters() - self.lin.reset_parameters() - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - max_num_neighbors: Optional[int] = None, - ) -> torch.Tensor: - bipartite = isinstance(x, Tuple) - graph, _ = self.get_cugraph( - edge_index=edge_index, - bipartite=bipartite, - max_num_neighbors=max_num_neighbors, - ) - - if self.project: - if bipartite: - x = (self.pre_lin(x[0]).relu(), x[1]) - else: - x = self.pre_lin(x).relu() - - out = agg_concat_n2n(x, graph, self.aggr) - - if self.root_weight: - out = self.lin(out) - else: - out = self.lin(out[:, : self.in_channels_src]) - - if self.normalize: - out = torch.nn.functional.normalize(out, p=2.0, dim=-1) - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, aggr={self.aggr})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py deleted file mode 100644 index e184ee0e893..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Tuple, Union - -from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch.operators import mha_simple_n2n - -from .base import BaseConv, CSC - -torch = import_optional("torch") -nn = import_optional("torch.nn") -torch_geometric = import_optional("torch_geometric") - - -class TransformerConv(BaseConv): - r"""The graph transformer operator from the `"Masked Label Prediction: - Unified Message Passing Model for Semi-Supervised Classification" - `_ paper. - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \mathbf{W}_2 \mathbf{x}_{j}, - - where the attention coefficients :math:`\alpha_{i,j}` are computed via - multi-head dot product attention: - - .. math:: - \alpha_{i,j} = \textrm{softmax} \left( - \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} (\mathbf{W}_4\mathbf{x}_j)} - {\sqrt{d}} \right) - - Args: - in_channels (int or tuple): Size of each input sample, or :obj:`-1` to - derive the size from the first input(s) to the forward method. - A tuple corresponds to the sizes of source and target - dimensionalities. - out_channels (int): Size of each output sample. - heads (int, optional): Number of multi-head-attentions. - (default: :obj:`1`) - concat (bool, optional): If set to :obj:`False`, the multi-head - attentions are averaged instead of concatenated. - (default: :obj:`True`) - beta (bool, optional): If set, will combine aggregation and - skip information via - - .. math:: - \mathbf{x}^{\prime}_i = \beta_i \mathbf{W}_1 \mathbf{x}_i + - (1 - \beta_i) \underbrace{\left(\sum_{j \in \mathcal{N}(i)} - \alpha_{i,j} \mathbf{W}_2 \vec{x}_j \right)}_{=\mathbf{m}_i} - - with :math:`\beta_i = \textrm{sigmoid}(\mathbf{w}_5^{\top} - [ \mathbf{W}_1 \mathbf{x}_i, \mathbf{m}_i, \mathbf{W}_1 - \mathbf{x}_i - \mathbf{m}_i ])` (default: :obj:`False`) - edge_dim (int, optional): Edge feature dimensionality (in case - there are any). Edge features are added to the keys after - linear transformation, that is, prior to computing the - attention dot product. They are also added to final values - after the same linear transformation. The model is: - - .. math:: - \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + - \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \left( - \mathbf{W}_2 \mathbf{x}_{j} + \mathbf{W}_6 \mathbf{e}_{ij} - \right), - - where the attention coefficients :math:`\alpha_{i,j}` are now - computed via: - - .. math:: - \alpha_{i,j} = \textrm{softmax} \left( - \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} - (\mathbf{W}_4\mathbf{x}_j + \mathbf{W}_6 \mathbf{e}_{ij})} - {\sqrt{d}} \right) - - (default :obj:`None`) - bias (bool, optional): If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) - root_weight (bool, optional): If set to :obj:`False`, the layer will - not add the transformed root node features to the output and the - option :attr:`beta` is set to :obj:`False`. (default: :obj:`True`) - """ - - def __init__( - self, - in_channels: Union[int, Tuple[int, int]], - out_channels: int, - heads: int = 1, - concat: bool = True, - beta: bool = False, - edge_dim: Optional[int] = None, - bias: bool = True, - root_weight: bool = True, - ): - super().__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.heads = heads - self.beta = beta and root_weight - self.root_weight = root_weight - self.concat = concat - self.edge_dim = edge_dim - - if isinstance(in_channels, int): - in_channels = (in_channels, in_channels) - - Linear = torch_geometric.nn.Linear - - self.lin_key = Linear(in_channels[0], heads * out_channels) - self.lin_query = Linear(in_channels[1], heads * out_channels) - self.lin_value = Linear(in_channels[0], heads * out_channels) - if edge_dim is not None: - self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False) - else: - self.lin_edge = self.register_parameter("lin_edge", None) - - if concat: - self.lin_skip = Linear(in_channels[1], heads * out_channels, bias=bias) - if self.beta: - self.lin_beta = Linear(3 * heads * out_channels, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - else: - self.lin_skip = Linear(in_channels[1], out_channels, bias=bias) - if self.beta: - self.lin_beta = Linear(3 * out_channels, 1, bias=False) - else: - self.lin_beta = self.register_parameter("lin_beta", None) - - self.reset_parameters() - - def reset_parameters(self): - self.lin_key.reset_parameters() - self.lin_query.reset_parameters() - self.lin_value.reset_parameters() - if self.lin_edge is not None: - self.lin_edge.reset_parameters() - self.lin_skip.reset_parameters() - if self.lin_beta is not None: - self.lin_beta.reset_parameters() - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - edge_index: Union[torch_geometric.EdgeIndex, CSC], - edge_attr: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - r"""Runs the forward pass of the module. - - Args: - x (torch.Tensor or tuple): The node features. Can be a tuple of - tensors denoting source and destination node features. - edge_index (EdgeIndex or CSC): The edge indices. - edge_attr: (torch.Tensor, optional) The edge features. - """ - bipartite = True - graph, perm = self.get_cugraph(edge_index=edge_index, bipartite=bipartite) - - if isinstance(x, torch.Tensor): - x = (x, x) - - query = self.lin_query(x[1]) - key = self.lin_key(x[0]) - value = self.lin_value(x[0]) - - if edge_attr is not None: - if self.lin_edge is None: - raise RuntimeError( - f"{self.__class__.__name__}.edge_dim must be set to accept " - f"edge features." - ) - if perm is not None: - edge_attr = edge_attr[perm] - edge_attr = self.lin_edge(edge_attr) - - out = mha_simple_n2n( - key, - query, - value, - graph, - self.heads, - self.concat, - edge_emb=edge_attr, - norm_by_dim=True, - score_bias=None, - ) - - if self.root_weight: - x_r = self.lin_skip(x[1]) - if self.lin_beta is not None: - beta = self.lin_beta(torch.cat([out, x_r, out - x_r], dim=-1)) - beta = beta.sigmoid() - out = beta * x_r + (1 - beta) * out - else: - out = out + x_r - - return out - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}({self.in_channels}, " - f"{self.out_channels}, heads={self.heads})" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/__init__.py b/python/cugraph-pyg/cugraph_pyg/sampler/__init__.py deleted file mode 100644 index 34fe9c4463e..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/sampler/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cugraph_pyg.sampler.sampler import BaseSampler, SampleIterator diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py b/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py deleted file mode 100644 index bc3d4fd8d3c..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py +++ /dev/null @@ -1,540 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional, Iterator, Union, Dict, Tuple - -from cugraph.utilities.utils import import_optional -from cugraph.gnn import DistSampler - -from .sampler_utils import filter_cugraph_pyg_store, neg_sample, neg_cat - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -class SampleIterator: - """ - Iterator that combines output graphs with their - features to produce final output minibatches - that can be fed into a GNN model. - """ - - def __init__( - self, - data: Tuple[ - "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" - ], - output_iter: Iterator[ - Union[ - "torch_geometric.sampler.HeteroSamplerOutput", - "torch_geometric.sampler.SamplerOutput", - ] - ], - ): - """ - Constructs a new SampleIterator - - Parameters - ---------- - data: Tuple[torch_geometric.data.FeatureStore, torch_geometric.data.GraphStore] - The original graph that samples were generated from, as a - FeatureStore/GraphStore tuple. - output_iter: Iterator[Union["torch_geometric.sampler.HeteroSamplerOutput", - "torch_geometric.sampler.SamplerOutput"]] - An iterator over outputted sampling results. - """ - self.__feature_store, self.__graph_store = data - self.__output_iter = output_iter - - def __next__(self): - next_sample = next(self.__output_iter) - if isinstance(next_sample, torch_geometric.sampler.SamplerOutput): - sz = next_sample.edge.numel() - if sz == next_sample.col.numel() and ( - next_sample.node.numel() > next_sample.col[-1] - ): - # This will only trigger on very small batches and will have minimal - # performance impact. If COO output is removed, then this condition - # can be avoided. - col = next_sample.col - else: - col = torch_geometric.edge_index.ptr2index( - next_sample.col, next_sample.edge.numel() - ) - - data = filter_cugraph_pyg_store( - self.__feature_store, - self.__graph_store, - next_sample.node, - next_sample.row, - col, - next_sample.edge, - None, - ) - - """ - # TODO Re-enable this once PyG resolves - # the issue with edge features (9566) - data = torch_geometric.loader.utils.filter_custom_store( - self.__feature_store, - self.__graph_store, - next_sample.node, - next_sample.row, - col, - next_sample.edge, - None, - ) - """ - - if "n_id" not in data: - data.n_id = next_sample.node - if next_sample.edge is not None and "e_id" not in data: - edge = next_sample.edge.to(torch.long) - data.e_id = edge - - data.batch = next_sample.batch - data.num_sampled_nodes = next_sample.num_sampled_nodes - data.num_sampled_edges = next_sample.num_sampled_edges - - data.input_id = next_sample.metadata[0] - data.batch_size = data.input_id.size(0) - - if len(next_sample.metadata) == 2: - data.seed_time = next_sample.metadata[1] - elif len(next_sample.metadata) == 4: - ( - data.edge_label_index, - data.edge_label, - data.seed_time, - ) = next_sample.metadata[1:] - else: - raise ValueError("Invalid metadata") - - elif isinstance(next_sample, torch_geometric.sampler.HeteroSamplerOutput): - col = {} - for edge_type, col_idx in next_sample.col: - sz = next_sample.edge[edge_type].numel() - if sz == col_idx.numel(): - col[edge_type] = col_idx - else: - col[edge_type] = torch_geometric.edge_index.ptr2index(col_idx, sz) - - data = torch_geometric.loader.utils.filter_custom_hetero_store( - self.__feature_store, - self.__graph_store, - next_sample.node, - next_sample.row, - col, - next_sample.edge, - None, - ) - - for key, node in next_sample.node.items(): - if "n_id" not in data[key]: - data[key].n_id = node - - for key, edge in (next_sample.edge or {}).items(): - if edge is not None and "e_id" not in data[key]: - edge = edge.to(torch.long) - data[key].e_id = edge - - data.set_value_dict("batch", next_sample.batch) - data.set_value_dict("num_sampled_nodes", next_sample.num_sampled_nodes) - data.set_value_dict("num_sampled_edges", next_sample.num_sampled_edges) - - # TODO figure out how to set input_id for heterogeneous output - else: - raise ValueError("Invalid output type") - - return data - - def __iter__(self): - return self - - -class SampleReader: - """ - Iterator that processes results from the cuGraph distributed sampler. - """ - - def __init__( - self, base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] - ): - """ - Constructs a new SampleReader. - - Parameters - ---------- - base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] - The reader responsible for loading saved samples produced by - the cuGraph distributed sampler. - """ - self.__base_reader = base_reader - self.__num_samples_remaining = 0 - self.__index = 0 - - def __next__(self): - if self.__num_samples_remaining == 0: - # raw_sample_data is already a dict of tensors - self.__raw_sample_data, start_inclusive, end_inclusive = next( - self.__base_reader - ) - - self.__raw_sample_data["input_offsets"] -= self.__raw_sample_data[ - "input_offsets" - ][0].clone() - self.__raw_sample_data["label_hop_offsets"] -= self.__raw_sample_data[ - "label_hop_offsets" - ][0].clone() - self.__raw_sample_data["renumber_map_offsets"] -= self.__raw_sample_data[ - "renumber_map_offsets" - ][0].clone() - if "major_offsets" in self.__raw_sample_data: - self.__raw_sample_data["major_offsets"] -= self.__raw_sample_data[ - "major_offsets" - ][0].clone() - - self.__num_samples_remaining = end_inclusive - start_inclusive + 1 - self.__index = 0 - - out = self._decode(self.__raw_sample_data, self.__index) - self.__index += 1 - self.__num_samples_remaining -= 1 - return out - - def __iter__(self): - return self - - -class HomogeneousSampleReader(SampleReader): - """ - Subclass of SampleReader that reads homogeneous output samples - produced by the cuGraph distributed sampler. - """ - - def __init__( - self, base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] - ): - """ - Constructs a new HomogeneousSampleReader - - Parameters - ---------- - base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] - The iterator responsible for loading saved samples produced by - the cuGraph distributed sampler. - """ - super().__init__(base_reader) - - def __decode_csc(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): - fanout_length = (raw_sample_data["label_hop_offsets"].numel() - 1) // ( - raw_sample_data["renumber_map_offsets"].numel() - 1 - ) - - major_offsets_start_incl = raw_sample_data["label_hop_offsets"][ - index * fanout_length - ] - major_offsets_end_incl = raw_sample_data["label_hop_offsets"][ - (index + 1) * fanout_length - ] - - major_offsets = raw_sample_data["major_offsets"][ - major_offsets_start_incl : major_offsets_end_incl + 1 - ].clone() - minors = raw_sample_data["minors"][major_offsets[0] : major_offsets[-1]] - edge_id = raw_sample_data["edge_id"][major_offsets[0] : major_offsets[-1]] - # don't retrieve edge type for a homogeneous graph - - major_offsets -= major_offsets[0].clone() - - renumber_map_start = raw_sample_data["renumber_map_offsets"][index] - renumber_map_end = raw_sample_data["renumber_map_offsets"][index + 1] - - renumber_map = raw_sample_data["map"][renumber_map_start:renumber_map_end] - - current_label_hop_offsets = raw_sample_data["label_hop_offsets"][ - index * fanout_length : (index + 1) * fanout_length + 1 - ].clone() - current_label_hop_offsets -= current_label_hop_offsets[0].clone() - - num_sampled_edges = major_offsets[current_label_hop_offsets].diff() - - num_sampled_nodes_hops = torch.tensor( - [ - minors[: num_sampled_edges[:i].sum()].max() + 1 - for i in range(1, fanout_length + 1) - ], - device="cpu", - ) - - num_seeds = ( - torch.searchsorted(major_offsets, num_sampled_edges[0]).reshape((1,)).cpu() - ) - num_sampled_nodes = torch.concat( - [num_seeds, num_sampled_nodes_hops.diff(prepend=num_seeds)] - ) - - input_index = raw_sample_data["input_index"][ - raw_sample_data["input_offsets"][index] : raw_sample_data["input_offsets"][ - index + 1 - ] - ] - - num_seeds = input_index.numel() - input_index = input_index[input_index >= 0] - - num_pos = input_index.numel() - num_neg = num_seeds - num_pos - if num_neg > 0: - edge_label = torch.concat( - [ - torch.full((num_pos,), 1.0), - torch.full((num_neg,), 0.0), - ] - ) - else: - edge_label = None - - edge_inverse = ( - ( - raw_sample_data["edge_inverse"][ - (raw_sample_data["input_offsets"][index] * 2) : ( - raw_sample_data["input_offsets"][index + 1] * 2 - ) - ] - ) - if "edge_inverse" in raw_sample_data - else None - ) - - if edge_inverse is None: - metadata = ( - input_index, - None, # TODO this will eventually include time - ) - else: - metadata = ( - input_index, - edge_inverse.view(2, -1), - edge_label, - None, # TODO this will eventually include time - ) - - return torch_geometric.sampler.SamplerOutput( - node=renumber_map.cpu(), - row=minors, - col=major_offsets, - edge=edge_id.cpu(), - batch=renumber_map[:num_seeds], - num_sampled_nodes=num_sampled_nodes.cpu(), - num_sampled_edges=num_sampled_edges.cpu(), - metadata=metadata, - ) - - def __decode_coo(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): - fanout_length = (raw_sample_data["label_hop_offsets"].numel() - 1) // ( - raw_sample_data["renumber_map_offsets"].numel() - 1 - ) - - major_minor_start = raw_sample_data["label_hop_offsets"][index * fanout_length] - ix_end = (index + 1) * fanout_length - if ix_end == raw_sample_data["label_hop_offsets"].numel(): - major_minor_end = raw_sample_data["majors"].numel() - else: - major_minor_end = raw_sample_data["label_hop_offsets"][ix_end] - - majors = raw_sample_data["majors"][major_minor_start:major_minor_end] - minors = raw_sample_data["minors"][major_minor_start:major_minor_end] - edge_id = raw_sample_data["edge_id"][major_minor_start:major_minor_end] - # don't retrieve edge type for a homogeneous graph - - renumber_map_start = raw_sample_data["renumber_map_offsets"][index] - renumber_map_end = raw_sample_data["renumber_map_offsets"][index + 1] - - renumber_map = raw_sample_data["map"][renumber_map_start:renumber_map_end] - - num_sampled_edges = ( - raw_sample_data["label_hop_offsets"][ - index * fanout_length : (index + 1) * fanout_length + 1 - ] - .diff() - .cpu() - ) - - num_seeds = (majors[: num_sampled_edges[0]].max() + 1).reshape((1,)).cpu() - num_sampled_nodes_hops = torch.tensor( - [ - minors[: num_sampled_edges[:i].sum()].max() + 1 - for i in range(1, fanout_length + 1) - ], - device="cpu", - ) - - num_sampled_nodes = torch.concat( - [num_seeds, num_sampled_nodes_hops.diff(prepend=num_seeds)] - ) - - input_index = raw_sample_data["input_index"][ - raw_sample_data["input_offsets"][index] : raw_sample_data["input_offsets"][ - index + 1 - ] - ] - - edge_inverse = ( - ( - raw_sample_data["edge_inverse"][ - (raw_sample_data["input_offsets"][index] * 2) : ( - raw_sample_data["input_offsets"][index + 1] * 2 - ) - ] - ) - if "edge_inverse" in raw_sample_data - else None - ) - - if edge_inverse is None: - metadata = ( - input_index, - None, # TODO this will eventually include time - ) - else: - metadata = ( - input_index, - edge_inverse.view(2, -1), - None, - None, # TODO this will eventually include time - ) - - return torch_geometric.sampler.SamplerOutput( - node=renumber_map.cpu(), - row=minors, - col=majors, - edge=edge_id, - batch=renumber_map[:num_seeds], - num_sampled_nodes=num_sampled_nodes, - num_sampled_edges=num_sampled_edges, - metadata=metadata, - ) - - def _decode(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): - if "major_offsets" in raw_sample_data: - return self.__decode_csc(raw_sample_data, index) - else: - return self.__decode_coo(raw_sample_data, index) - - -class BaseSampler: - def __init__( - self, - sampler: DistSampler, - data: Tuple[ - "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" - ], - batch_size: int = 16, - ): - self.__sampler = sampler - self.__feature_store, self.__graph_store = data - self.__batch_size = batch_size - - def sample_from_nodes( - self, index: "torch_geometric.sampler.NodeSamplerInput", **kwargs - ) -> Iterator[ - Union[ - "torch_geometric.sampler.HeteroSamplerOutput", - "torch_geometric.sampler.SamplerOutput", - ] - ]: - reader = self.__sampler.sample_from_nodes( - index.node, batch_size=self.__batch_size, input_id=index.input_id, **kwargs - ) - - edge_attrs = self.__graph_store.get_all_edge_attrs() - if ( - len(edge_attrs) == 1 - and edge_attrs[0].edge_type[0] == edge_attrs[0].edge_type[2] - ): - return HomogeneousSampleReader(reader) - else: - # TODO implement heterogeneous sampling - raise NotImplementedError( - "Sampling heterogeneous graphs is currently" - " unsupported in the non-dask API" - ) - - def sample_from_edges( - self, - index: "torch_geometric.sampler.EdgeSamplerInput", - neg_sampling: Optional["torch_geometric.sampler.NegativeSampling"], - **kwargs, - ) -> Iterator[ - Union[ - "torch_geometric.sampler.HeteroSamplerOutput", - "torch_geometric.sampler.SamplerOutput", - ] - ]: - src = index.row - dst = index.col - input_id = index.input_id - neg_batch_size = 0 - if neg_sampling: - # Sample every negative subset at once. - # TODO handle temporal sampling (node_time) - src_neg, dst_neg = neg_sample( - self.__graph_store, - index.row, - index.col, - self.__batch_size, - neg_sampling, - None, # src_time, - None, # src_node_time, - ) - if neg_sampling.is_binary(): - src, _ = neg_cat(src.cuda(), src_neg, self.__batch_size) - else: - # triplet, cat dst to src so length is the same; will - # result in the same set of unique vertices - src, _ = neg_cat(src.cuda(), dst_neg, self.__batch_size) - dst, neg_batch_size = neg_cat(dst.cuda(), dst_neg, self.__batch_size) - - # Concatenate -1s so the input id tensor lines up and can - # be processed by the dist sampler. - # When loading the output batch, '-1' will be dropped. - input_id, _ = neg_cat( - input_id, - torch.full( - (dst_neg.numel(),), -1, dtype=torch.int64, device=input_id.device - ), - self.__batch_size, - ) - - # TODO for temporal sampling, node times have to be - # adjusted here. - reader = self.__sampler.sample_from_edges( - torch.stack([src, dst]), # reverse of usual convention - input_id=input_id, - batch_size=self.__batch_size + neg_batch_size, - **kwargs, - ) - - edge_attrs = self.__graph_store.get_all_edge_attrs() - if ( - len(edge_attrs) == 1 - and edge_attrs[0].edge_type[0] == edge_attrs[0].edge_type[2] - ): - return HomogeneousSampleReader(reader) - else: - # TODO implement heterogeneous sampling - raise NotImplementedError( - "Sampling heterogeneous graphs is currently" - " unsupported in the non-dask API" - ) diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py b/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py deleted file mode 100644 index b3d56ef9992..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py +++ /dev/null @@ -1,531 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from typing import Sequence, Dict, Tuple - -from math import ceil - -from cugraph_pyg.data import GraphStore, DaskGraphStore - -from cugraph.utilities.utils import import_optional -import cudf -import cupy -import pylibcugraph - -dask_cudf = import_optional("dask_cudf") -torch_geometric = import_optional("torch_geometric") - -torch = import_optional("torch") -HeteroSamplerOutput = torch_geometric.sampler.base.HeteroSamplerOutput - - -def _get_unique_nodes( - sampling_results: cudf.DataFrame, - graph_store: DaskGraphStore, - node_type: str, - node_position: str, -) -> int: - """ - Counts the number of unique nodes of a given node type. - - Parameters - ---------- - sampling_results: cudf.DataFrame - The dataframe containing sampling results or filtered sampling results - (i.e. sampling results for hop 2) - graph_store: DaskGraphStore - The graph store containing the structure of the sampled graph. - node_type: str - The node type to count the number of unique nodes of. - node_position: str ('src' or 'dst') - Whether to examine source or destination nodes. - - Returns - ------- - cudf.Series - The unique nodes of the given node type. - """ - if node_position == "src": - edge_index = "majors" - edge_sel = 0 - elif node_position == "dst": - edge_index = "minors" - edge_sel = -1 - else: - raise ValueError(f"Illegal value {node_position} for node_position") - - etypes = [ - graph_store.canonical_edge_type_to_numeric(et) - for et in graph_store.edge_types - if et[edge_sel] == node_type - ] - if len(etypes) > 0: - f = sampling_results.edge_type == etypes[0] - for et in etypes[1:]: - f |= sampling_results.edge_type == et - - sampling_results_node = sampling_results[f] - else: - return cudf.Series([], dtype="int64") - - return sampling_results_node[edge_index] - - -def _sampler_output_from_sampling_results_homogeneous_coo( - sampling_results: cudf.DataFrame, - renumber_map: torch.Tensor, - graph_store: DaskGraphStore, - data_index: Dict[Tuple[int, int], Dict[str, int]], - batch_id: int, - metadata: Sequence = None, -) -> HeteroSamplerOutput: - """ - Parameters - ---------- - sampling_results: cudf.DataFrame - The dataframe containing sampling results. - renumber_map: torch.Tensor - The tensor containing the renumber map, or None if there - is no renumber map. - graph_store: DaskGraphStore - The graph store containing the structure of the sampled graph. - data_index: Dict[Tuple[int, int], Dict[str, int]] - Dictionary where keys are the batch id and hop id, - and values are dictionaries containing the max src - and max dst node ids for the batch and hop. - batch_id: int - The current batch id, whose samples are being retrieved - from the sampling results and data index. - metadata: Tensor - The metadata for the sampled batch. - - Returns - ------- - HeteroSamplerOutput - """ - - if len(graph_store.edge_types) > 1 or len(graph_store.node_types) > 1: - raise ValueError("Graph is heterogeneous") - - hops = torch.arange( - sampling_results.hop_id.iloc[len(sampling_results) - 1] + 1, device="cuda" - ) - hops = torch.searchsorted( - torch.as_tensor(sampling_results.hop_id, device="cuda"), hops - ) - - node_type = graph_store.node_types[0] - edge_type = graph_store.edge_types[0] - - num_nodes_per_hop_dict = {node_type: torch.zeros(len(hops) + 1, dtype=torch.int64)} - num_edges_per_hop_dict = {edge_type: torch.zeros(len(hops), dtype=torch.int64)} - - if renumber_map is None: - raise ValueError("Renumbered input is expected for homogeneous graphs") - - noi_index = {node_type: torch.as_tensor(renumber_map, device="cuda")} - - row_dict = { - edge_type: torch.as_tensor(sampling_results.majors, device="cuda"), - } - - col_dict = { - edge_type: torch.as_tensor(sampling_results.minors, device="cuda"), - } - - num_nodes_per_hop_dict[node_type][0] = data_index[batch_id, 0]["src_max"] + 1 - for hop in range(len(hops)): - hop_ix_start = hops[hop] - hop_ix_end = hops[hop + 1] if hop < len(hops) - 1 else len(sampling_results) - - if num_nodes_per_hop_dict[node_type][hop] > 0: - max_id_hop = data_index[batch_id, hop]["dst_max"] - max_id_prev_hop = ( - data_index[batch_id, hop - 1]["dst_max"] - if hop > 0 - else data_index[batch_id, 0]["src_max"] - ) - - if max_id_hop > max_id_prev_hop: - num_nodes_per_hop_dict[node_type][hop + 1] = ( - max_id_hop - max_id_prev_hop - ) - else: - num_nodes_per_hop_dict[node_type][hop + 1] = 0 - # will default to 0 if the previous hop was 0, since this is a PyG requirement - - num_edges_per_hop_dict[edge_type][hop] = hop_ix_end - hop_ix_start - - if HeteroSamplerOutput is None: - raise ImportError("Error importing from pyg") - - return HeteroSamplerOutput( - node=noi_index, - row=row_dict, - col=col_dict, - edge=None, - num_sampled_nodes={k: t.tolist() for k, t in num_nodes_per_hop_dict.items()}, - num_sampled_edges={k: t.tolist() for k, t in num_edges_per_hop_dict.items()}, - metadata=metadata, - ) - - -def _sampler_output_from_sampling_results_homogeneous_csr( - major_offsets: torch.Tensor, - minors: torch.Tensor, - renumber_map: torch.Tensor, - graph_store: DaskGraphStore, - label_hop_offsets: torch.Tensor, - batch_id: int, - metadata: Sequence = None, -) -> HeteroSamplerOutput: - """ - Parameters - ---------- - major_offsets: torch.Tensor - The major offsets for the CSC/CSR matrix ("row pointer") - minors: torch.Tensor - The minors for the CSC/CSR matrix ("col index") - renumber_map: torch.Tensor - The tensor containing the renumber map. - Required. - graph_store: DaskGraphStore - The graph store containing the structure of the sampled graph. - label_hop_offsets: torch.Tensor - The tensor containing the label-hop offsets. - batch_id: int - The current batch id, whose samples are being retrieved - from the sampling results and data index. - metadata: Tensor - The metadata for the sampled batch. - - Returns - ------- - HeteroSamplerOutput - """ - - if len(graph_store.edge_types) > 1 or len(graph_store.node_types) > 1: - raise ValueError("Graph is heterogeneous") - - if renumber_map is None: - raise ValueError("Renumbered input is expected for homogeneous graphs") - node_type = graph_store.node_types[0] - edge_type = graph_store.edge_types[0] - - major_offsets = major_offsets.clone() - major_offsets[0] - label_hop_offsets = label_hop_offsets.clone() - label_hop_offsets[0] - - num_edges_per_hop_dict = { - edge_type: major_offsets[label_hop_offsets].diff().tolist() - } - - label_hop_offsets = label_hop_offsets.cpu() - num_nodes_per_hop_dict = { - node_type: torch.concat( - [ - label_hop_offsets.diff(), - (renumber_map.shape[0] - label_hop_offsets[-1]).reshape((1,)), - ] - ).tolist() - } - - noi_index = {node_type: torch.as_tensor(renumber_map, device="cuda")} - - col_dict = { - edge_type: major_offsets, - } - - row_dict = { - edge_type: minors, - } - - if HeteroSamplerOutput is None: - raise ImportError("Error importing from pyg") - - return HeteroSamplerOutput( - node=noi_index, - row=row_dict, - col=col_dict, - edge=None, - num_sampled_nodes=num_nodes_per_hop_dict, - num_sampled_edges=num_edges_per_hop_dict, - metadata=metadata, - ) - - -def _sampler_output_from_sampling_results_heterogeneous( - sampling_results: cudf.DataFrame, - renumber_map: cudf.Series, - graph_store: DaskGraphStore, - metadata: Sequence = None, -) -> HeteroSamplerOutput: - """ - Parameters - ---------- - sampling_results: cudf.DataFrame - The dataframe containing sampling results. - renumber_map: cudf.Series - The series containing the renumber map, or None if there - is no renumber map. - graph_store: DaskGraphStore - The graph store containing the structure of the sampled graph. - metadata: Tensor - The metadata for the sampled batch. - - Returns - ------- - HeteroSamplerOutput - """ - - hops = torch.arange(sampling_results.hop_id.max() + 1, device="cuda") - hops = torch.searchsorted( - torch.as_tensor(sampling_results.hop_id, device="cuda"), hops - ) - - num_nodes_per_hop_dict = {} - num_edges_per_hop_dict = {} - - # Fill out hop 0 in num_nodes_per_hop_dict, which is based on src instead of dst - sampling_results_hop_0 = sampling_results.iloc[ - 0 : (hops[1] if len(hops) > 1 else len(sampling_results)) - ] - - for node_type in graph_store.node_types: - num_unique_nodes = _get_unique_nodes( - sampling_results_hop_0, graph_store, node_type, "src" - ).nunique() - - if num_unique_nodes > 0: - num_nodes_per_hop_dict[node_type] = torch.zeros( - len(hops) + 1, dtype=torch.int64 - ) - num_nodes_per_hop_dict[node_type][0] = num_unique_nodes - - if renumber_map is not None: - raise ValueError( - "Precomputing the renumber map is currently " - "unsupported for heterogeneous graphs." - ) - - # Calculate nodes of interest based on unique nodes in order of appearance - # Use hop 0 sources since those are the only ones not included in destinations - # Use torch.concat based on benchmark performance (vs. cudf.concat) - - if sampling_results_hop_0 is None: - sampling_results_hop_0 = sampling_results.iloc[ - 0 : (hops[1] if len(hops) > 1 else len(sampling_results)) - ] - - nodes_of_interest = ( - cudf.Series( - torch.concat( - [ - torch.as_tensor(sampling_results_hop_0.majors, device="cuda"), - torch.as_tensor(sampling_results.minors, device="cuda"), - ] - ), - name="nodes_of_interest", - ) - .drop_duplicates() - .sort_index() - ) - - # Get the grouped node index (for creating the renumbered grouped edge index) - noi_index = graph_store._get_vertex_groups_from_sample( - torch.as_tensor(nodes_of_interest, device="cuda") - ) - del nodes_of_interest - - # Get the new edge index (by type as expected for HeteroData) - # FIXME handle edge ids/types after the C++ updates - row_dict, col_dict = graph_store._get_renumbered_edge_groups_from_sample( - sampling_results, noi_index - ) - - for hop in range(len(hops)): - hop_ix_start = hops[hop] - hop_ix_end = hops[hop + 1] if hop < len(hops) - 1 else len(sampling_results) - sampling_results_to_hop = sampling_results.iloc[0:hop_ix_end] - - for node_type in graph_store.node_types: - unique_nodes_hop = _get_unique_nodes( - sampling_results_to_hop, graph_store, node_type, "dst" - ) - - unique_nodes_0 = _get_unique_nodes( - sampling_results_hop_0, graph_store, node_type, "src" - ) - - num_unique_nodes = cudf.concat([unique_nodes_0, unique_nodes_hop]).nunique() - - if num_unique_nodes > 0: - if node_type not in num_nodes_per_hop_dict: - num_nodes_per_hop_dict[node_type] = torch.zeros( - len(hops) + 1, dtype=torch.int64 - ) - num_nodes_per_hop_dict[node_type][hop + 1] = num_unique_nodes - int( - num_nodes_per_hop_dict[node_type][: hop + 1].sum(0) - ) - - numeric_etypes, counts = torch.unique( - torch.as_tensor( - sampling_results.iloc[hop_ix_start:hop_ix_end].edge_type, - device="cuda", - ), - return_counts=True, - ) - numeric_etypes = list(numeric_etypes) - counts = list(counts) - for num_etype, count in zip(numeric_etypes, counts): - can_etype = graph_store.numeric_edge_type_to_canonical(num_etype) - if can_etype not in num_edges_per_hop_dict: - num_edges_per_hop_dict[can_etype] = torch.zeros( - len(hops), dtype=torch.int64 - ) - num_edges_per_hop_dict[can_etype][hop] = count - - if HeteroSamplerOutput is None: - raise ImportError("Error importing from pyg") - - return HeteroSamplerOutput( - node=noi_index, - row=row_dict, - col=col_dict, - edge=None, - num_sampled_nodes={k: t.tolist() for k, t in num_nodes_per_hop_dict.items()}, - num_sampled_edges={k: t.tolist() for k, t in num_edges_per_hop_dict.items()}, - metadata=metadata, - ) - - -def filter_cugraph_pyg_store( - feature_store, - graph_store, - node, - row, - col, - edge, - clx, -) -> "torch_geometric.data.Data": - data = torch_geometric.data.Data() - - data.edge_index = torch.stack([row, col], dim=0) - - required_attrs = [] - for attr in feature_store.get_all_tensor_attrs(): - attr.index = edge if isinstance(attr.group_name, tuple) else node - required_attrs.append(attr) - data.num_nodes = attr.index.size(0) - - tensors = feature_store.multi_get_tensor(required_attrs) - for i, attr in enumerate(required_attrs): - data[attr.attr_name] = tensors[i] - - return data - - -def neg_sample( - graph_store: GraphStore, - seed_src: "torch.Tensor", - seed_dst: "torch.Tensor", - batch_size: int, - neg_sampling: "torch_geometric.sampler.NegativeSampling", - time: "torch.Tensor", - node_time: "torch.Tensor", -) -> Tuple["torch.Tensor", "torch.Tensor"]: - try: - # Compatibility for PyG 2.5 - src_weight = neg_sampling.src_weight - dst_weight = neg_sampling.dst_weight - except AttributeError: - src_weight = neg_sampling.weight - dst_weight = neg_sampling.weight - unweighted = src_weight is None and dst_weight is None - - # Require at least one negative edge per batch - num_neg = max( - int(ceil(neg_sampling.amount * seed_src.numel())), - int(ceil(seed_src.numel() / batch_size)), - ) - - if graph_store.is_multi_gpu: - num_neg_global = torch.tensor([num_neg], device="cuda") - torch.distributed.all_reduce(num_neg_global, op=torch.distributed.ReduceOp.SUM) - num_neg = int(num_neg_global) - else: - num_neg_global = num_neg - - if node_time is None: - result_dict = pylibcugraph.negative_sampling( - graph_store._resource_handle, - graph_store._graph, - num_neg_global, - vertices=None - if unweighted - else cupy.arange(src_weight.numel(), dtype="int64"), - src_bias=None if src_weight is None else cupy.asarray(src_weight), - dst_bias=None if dst_weight is None else cupy.asarray(dst_weight), - remove_duplicates=False, - remove_false_negatives=False, - exact_number_of_samples=True, - do_expensive_check=False, - ) - - src_neg = torch.as_tensor(result_dict["sources"], device="cuda")[:num_neg] - dst_neg = torch.as_tensor(result_dict["destinations"], device="cuda")[:num_neg] - - # TODO modifiy the C API so this condition is impossible - if src_neg.numel() < num_neg: - num_gen = num_neg - src_neg.numel() - src_neg = torch.concat( - [ - src_neg, - torch.randint( - 0, src_neg.max(), (num_gen,), device="cuda", dtype=torch.int64 - ), - ] - ) - dst_neg = torch.concat( - [ - dst_neg, - torch.randint( - 0, dst_neg.max(), (num_gen,), device="cuda", dtype=torch.int64 - ), - ] - ) - return src_neg, dst_neg - raise NotImplementedError( - "Temporal negative sampling is currently unimplemented in cuGraph-PyG" - ) - - -def neg_cat( - seed_pos: "torch.Tensor", seed_neg: "torch.Tensor", pos_batch_size: int -) -> Tuple["torch.Tensor", int]: - num_seeds = seed_pos.numel() - num_batches = int(ceil(num_seeds / pos_batch_size)) - neg_batch_size = int(ceil(seed_neg.numel() / num_batches)) - - batch_pos_offsets = torch.full((num_batches,), pos_batch_size).cumsum(-1)[:-1] - seed_pos_splits = torch.tensor_split(seed_pos, batch_pos_offsets) - - batch_neg_offsets = torch.full((num_batches,), neg_batch_size).cumsum(-1)[:-1] - seed_neg_splits = torch.tensor_split(seed_neg, batch_neg_offsets) - - return ( - torch.concatenate( - [torch.concatenate(s) for s in zip(seed_pos_splits, seed_neg_splits)] - ), - neg_batch_size, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py deleted file mode 100644 index 30994289f9c..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py +++ /dev/null @@ -1,315 +0,0 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import pytest - -from dask_cuda.initialize import initialize as dask_initialize -from dask_cuda import LocalCUDACluster -from dask.distributed import Client -from cugraph.dask.comms import comms as Comms -from cugraph.dask.common.mg_utils import get_visible_devices -from cugraph.testing.mg_utils import stop_dask_client - -import torch -import numpy as np -from cugraph.gnn import FeatureStore -from cugraph.datasets import karate - -import tempfile - -# module-wide fixtures - -# If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark" -# fixture will be available automatically. Check that this fixture is available -# by trying to import rapids_pytest_benchmark, and if that fails, set -# "gpubenchmark" to the standard "benchmark" fixture provided by -# pytest-benchmark. -try: - import rapids_pytest_benchmark # noqa: F401 -except ImportError: - import pytest_benchmark - - gpubenchmark = pytest_benchmark.plugin.benchmark - - -@pytest.fixture(scope="module") -def dask_client(): - dask_scheduler_file = os.environ.get("SCHEDULER_FILE") - cuda_visible_devices = get_visible_devices() - - if dask_scheduler_file is not None: - dask_initialize() - dask_client = Client(scheduler_file=dask_scheduler_file) - else: - # The tempdir created by tempdir_object should be cleaned up once - # tempdir_object goes out-of-scope and is deleted. - tempdir_object = tempfile.TemporaryDirectory() - cluster = LocalCUDACluster( - local_directory=tempdir_object.name, - protocol="tcp", - CUDA_VISIBLE_DEVICES=cuda_visible_devices, - ) - - dask_client = Client(cluster) - dask_client.wait_for_workers(len(cuda_visible_devices)) - - if not Comms.is_initialized(): - Comms.initialize(p2p=True) - - yield dask_client - - stop_dask_client(dask_client) - print("\ndask_client fixture: client.close() called") - - -@pytest.fixture -def karate_gnn(): - el = karate.get_edgelist().reset_index(drop=True) - el.src = el.src.astype("int64") - el.dst = el.dst.astype("int64") - all_vertices = np.array_split(np.arange(34), 2) - - F = FeatureStore(backend="torch") - F.add_data( - torch.arange(len(all_vertices[0]), dtype=torch.float32) * 31, - "type0", - "prop0", - ) - F.add_data( - torch.arange(len(all_vertices[1]), dtype=torch.float32) * 41, - "type1", - "prop0", - ) - - N = { - "type0": len(all_vertices[0]), - "type1": len(all_vertices[1]), - } - - offsets = {"type0": 0, "type1": N["type0"]} - - G = { - ("type0", "et01", "type1"): el[ - el.src.isin(all_vertices[0]) & el.dst.isin(all_vertices[1]) - ].reset_index(drop=True), - ("type1", "et10", "type0"): el[ - el.src.isin(all_vertices[1]) & el.dst.isin(all_vertices[0]) - ].reset_index(drop=True), - ("type0", "et00", "type0"): el[ - el.src.isin(all_vertices[0]) & el.dst.isin(all_vertices[0]) - ], - ("type1", "et11", "type1"): el[ - el.src.isin(all_vertices[1]) & el.dst.isin(all_vertices[1]) - ].reset_index(drop=True), - } - - G = { - (src_type, edge_type, dst_type): ( - torch.tensor(elx["src"].values_host - offsets[src_type]), - torch.tensor(elx["dst"].values_host - offsets[dst_type]), - ) - for (src_type, edge_type, dst_type), elx in G.items() - } - - return F, G, N - - -@pytest.fixture -def basic_graph_1(): - G = { - ("vt1", "pig", "vt1"): [ - torch.tensor([0, 0, 1, 2, 2, 3]), - torch.tensor([1, 2, 4, 3, 4, 1]), - ] - } - - N = {"vt1": 5} - - F = FeatureStore() - F.add_data( - torch.tensor([100, 200, 300, 400, 500]), type_name="vt1", feat_name="prop1" - ) - - F.add_data(torch.tensor([5, 4, 3, 2, 1]), type_name="vt1", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def multi_edge_graph_1(): - G = { - ("vt1", "pig", "vt1"): [torch.tensor([0, 2, 3, 1]), torch.tensor([1, 3, 1, 4])], - ("vt1", "dog", "vt1"): [torch.tensor([0, 3, 4]), torch.tensor([2, 2, 3])], - ("vt1", "cat", "vt1"): [ - torch.tensor([1, 2, 2]), - torch.tensor([4, 3, 4]), - ], - } - - N = {"vt1": 5} - - F = FeatureStore() - F.add_data( - torch.tensor([100, 200, 300, 400, 500]), type_name="vt1", feat_name="prop1" - ) - - F.add_data(torch.tensor([5, 4, 3, 2, 1]), type_name="vt1", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def multi_edge_multi_vertex_graph_1(): - - G = { - ("brown", "horse", "brown"): [ - torch.tensor([0, 0]), - torch.tensor([1, 2]), - ], - ("brown", "tortoise", "black"): [ - torch.tensor([1, 1, 2]), - torch.tensor([1, 0, 1]), - ], - ("brown", "mongoose", "black"): [ - torch.tensor([2, 1]), - torch.tensor([0, 1]), - ], - ("black", "cow", "brown"): [ - torch.tensor([0, 0]), - torch.tensor([1, 2]), - ], - ("black", "snake", "black"): [ - torch.tensor([1]), - torch.tensor([0]), - ], - } - - N = {"brown": 3, "black": 2} - - F = FeatureStore() - F.add_data(torch.tensor([100, 200, 300]), type_name="brown", feat_name="prop1") - - F.add_data(torch.tensor([400, 500]), type_name="black", feat_name="prop1") - - F.add_data(torch.tensor([5, 4, 3]), type_name="brown", feat_name="prop2") - - F.add_data(torch.tensor([2, 1]), type_name="black", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def multi_edge_multi_vertex_no_graph_1(): - G = { - ("brown", "horse", "brown"): 2, - ("brown", "tortoise", "black"): 3, - ("brown", "mongoose", "black"): 3, - ("black", "cow", "brown"): 3, - ("black", "snake", "black"): 1, - } - - N = {"brown": 3, "black": 2} - - F = FeatureStore() - F.add_data(np.array([100, 200, 300]), type_name="brown", feat_name="prop1") - - F.add_data(np.array([400, 500]), type_name="black", feat_name="prop1") - - F.add_data(np.array([5, 4, 3]), type_name="brown", feat_name="prop2") - - F.add_data(np.array([2, 1]), type_name="black", feat_name="prop2") - - return F, G, N - - -@pytest.fixture -def abc_graph(): - N = { - "A": 2, # 0, 1 - "B": 3, # 2, 3, 4 - "C": 4, # 5, 6, 7, 8 - } - - G = { - # (0->2, 0->3, 1->3) - ("A", "ab", "B"): [ - torch.tensor([0, 0, 1], dtype=torch.int64), - torch.tensor([0, 1, 1], dtype=torch.int64), - ], - # (2->0, 2->1, 3->1, 4->0) - ("B", "ba", "A"): [ - torch.tensor([0, 0, 1, 2], dtype=torch.int64), - torch.tensor([0, 1, 1, 0], dtype=torch.int64), - ], - # (2->6, 2->8, 3->5, 3->7, 4->5, 4->8) - ("B", "bc", "C"): [ - torch.tensor([0, 0, 1, 1, 2, 2], dtype=torch.int64), - torch.tensor([1, 3, 0, 2, 0, 3], dtype=torch.int64), - ], - } - - F = FeatureStore() - F.add_data( - torch.tensor([3.2, 2.1], dtype=torch.float32), type_name="A", feat_name="prop1" - ) - - return F, G, N - - -@pytest.fixture -def basic_pyg_graph_1(): - edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) - size = (4, 4) - return edge_index, size - - -@pytest.fixture -def basic_pyg_graph_2(): - edge_index = torch.tensor( - [ - [0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9], - [1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0], - ] - ) - size = (10, 10) - return edge_index, size - - -@pytest.fixture -def sample_pyg_hetero_data(): - torch.manual_seed(12345) - raw_data_dict = { - "v0": torch.randn(6, 3), - "v1": torch.randn(7, 2), - "v2": torch.randn(5, 4), - ("v2", "e0", "v1"): torch.tensor([[0, 2, 2, 4, 4], [4, 3, 6, 0, 1]]), - ("v1", "e1", "v1"): torch.tensor( - [[0, 2, 2, 2, 3, 5, 5], [4, 0, 4, 5, 3, 0, 1]] - ), - ("v0", "e2", "v0"): torch.tensor([[0, 2, 2, 3, 5, 5], [1, 1, 5, 1, 1, 2]]), - ("v1", "e3", "v2"): torch.tensor( - [[0, 1, 1, 2, 4, 5, 6], [1, 2, 3, 1, 2, 2, 2]] - ), - ("v0", "e4", "v2"): torch.tensor([[1, 1, 3, 3, 4, 4], [1, 4, 1, 4, 0, 3]]), - } - - # create a nested dictionary to facilitate PyG's HeteroData construction - hetero_data_dict = {} - for key, value in raw_data_dict.items(): - if isinstance(key, tuple): - hetero_data_dict[key] = {"edge_index": value} - else: - hetero_data_dict[key] = {"x": value} - - return hetero_data_dict diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_dask_graph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_dask_graph_store.py deleted file mode 100644 index 0a997a960b8..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_dask_graph_store.py +++ /dev/null @@ -1,413 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cugraph -from cugraph_pyg.data.dask_graph_store import ( - CuGraphTensorAttr, - CuGraphEdgeAttr, - EdgeLayout, -) -from cugraph_pyg.data import DaskGraphStore - -import cudf -import cupy -import numpy as np - -from cugraph.utilities.utils import import_optional, MissingModule - -import pytest - - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_tensor_attr(): - ta = CuGraphTensorAttr("group0", "property1") - assert not ta.is_fully_specified() - assert not ta.is_set("index") - - ta.fully_specify() - assert ta.is_fully_specified() - - other_ta = CuGraphTensorAttr(index=[1, 2, 3]) - ta.update(other_ta) - assert ta.index == [1, 2, 3] - - casted_ta1 = CuGraphTensorAttr.cast(ta) - assert casted_ta1 == ta - - casted_ta2 = CuGraphTensorAttr.cast(index=[1, 2, 3]) - assert casted_ta2.index == [1, 2, 3] - assert not casted_ta2.is_fully_specified() - - casted_ta3 = CuGraphTensorAttr.cast( - "group2", - "property2", - [1, 2, 3], - ) - assert casted_ta3.group_name == "group2" - assert casted_ta3.attr_name == "property2" - assert casted_ta3.index == [1, 2, 3] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_edge_attr(): - ea = CuGraphEdgeAttr("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - ea = CuGraphEdgeAttr(edge_type="type1", layout="csr", is_sorted=True) - assert ea.size is None - - ea = CuGraphEdgeAttr.cast("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - -@pytest.fixture( - params=[ - "basic_graph_1", - "multi_edge_graph_1", - "multi_edge_multi_vertex_graph_1", - ] -) -def graph(request): - return request.getfixturevalue(request.param) - - -@pytest.fixture(params=["basic_graph_1", "multi_edge_graph_1"]) -def single_vertex_graph(request): - return request.getfixturevalue(request.param) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.parametrize("edge_index_type", ["numpy", "torch-cpu", "torch-gpu", "cudf"]) -@pytest.mark.sg -def test_get_edge_index(graph, edge_index_type): - F, G, N = graph - if "torch" in edge_index_type: - if edge_index_type == "torch-cpu": - device = "cpu" - else: - device = "cuda" - for et in list(G.keys()): - G[et][0] = torch.as_tensor(G[et][0], device=device) - G[et][1] = torch.as_tensor(G[et][1], device=device) - elif edge_index_type == "cudf": - for et in list(G.keys()): - G[et][0] = cudf.Series(G[et][0]) - G[et][1] = cudf.Series(G[et][1]) - - cugraph_store = DaskGraphStore(F, G, N, order="CSC") - - for pyg_can_edge_type in G: - src, dst = cugraph_store.get_edge_index( - edge_type=pyg_can_edge_type, layout="coo", is_sorted=False - ) - - if edge_index_type == "cudf": - assert G[pyg_can_edge_type][0].values_host.tolist() == src.tolist() - assert G[pyg_can_edge_type][1].values_host.tolist() == dst.tolist() - else: - assert G[pyg_can_edge_type][0].tolist() == src.tolist() - assert G[pyg_can_edge_type][1].tolist() == dst.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_edge_types(graph): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N) - - eta = cugraph_store._edge_types_to_attrs - assert eta.keys() == G.keys() - - for attr_name, attr_repr in eta.items(): - src_size = N[attr_name[0]] - dst_size = N[attr_name[-1]] - assert src_size == attr_repr.size[0] - assert dst_size == attr_repr.size[-1] - assert attr_name == attr_repr.edge_type - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_get_subgraph(graph): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N) - - if len(G.keys()) > 1: - for edge_type in G.keys(): - # Subgraphing is not implemented yet and should raise an error - with pytest.raises(ValueError): - sg = cugraph_store._subgraph([edge_type]) - - sg = cugraph_store._subgraph(list(G.keys())) - assert isinstance(sg, cugraph.MultiGraph) - - num_edges = sum([len(v[0]) for v in G.values()]) - assert sg.number_of_edges() == num_edges - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_renumber_vertices_basic(single_vertex_graph): - F, G, N = single_vertex_graph - cugraph_store = DaskGraphStore(F, G, N) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ) - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - assert index["vt1"].tolist() == nodes_of_interest.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_renumber_vertices_multi_edge_multi_vertex(multi_edge_multi_vertex_graph_1): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = DaskGraphStore(F, G, N) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ).unique() - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - - black_nodes = nodes_of_interest[nodes_of_interest <= 1] - brown_nodes = nodes_of_interest[nodes_of_interest > 1] - 2 - - if len(black_nodes) > 0: - assert index["black"].tolist() == sorted(black_nodes.tolist()) - if len(brown_nodes) > 0: - assert index["brown"].tolist() == sorted(brown_nodes.tolist()) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_renumber_edges(abc_graph): - F, G, N = abc_graph - - graph_store = DaskGraphStore(F, G, N, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - mock_noi_index = { - "A": torch.tensor([0, 1], device="cuda"), - "B": torch.tensor([0, 1], device="cuda"), - "C": torch.tensor([3, 2, 0], device="cuda"), - } - - row_dict, col_dict = graph_store._get_renumbered_edge_groups_from_sample( - mock_sampling_results, mock_noi_index - ) - - assert len(row_dict) == 3 - assert len(col_dict) == 3 - assert row_dict[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert col_dict[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert row_dict[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert col_dict[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert row_dict[("B", "ba", "A")].tolist() == [1, 1] - assert col_dict[("B", "ba", "A")].tolist() == [1, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_get_tensor(graph): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N) - - for feature_name, feature_on_types in F.get_feature_list().items(): - for type_name in feature_on_types: - v_ids = np.arange(N[type_name]) - base_series = F.get_data( - v_ids, - type_name=type_name, - feat_name=feature_name, - ).tolist() - - tsr = cugraph_store.get_tensor( - type_name, feature_name, v_ids, None, cupy.int64 - ).tolist() - - assert tsr == base_series - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_get_tensor_empty_idx(karate_gnn): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N) - - t = cugraph_store.get_tensor( - CuGraphTensorAttr(group_name="type0", attr_name="prop0", index=None) - ) - assert t.tolist() == (torch.arange(17, dtype=torch.float32) * 31).tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_multi_get_tensor(graph): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N) - - for vertex_type in sorted(N.keys()): - v_ids = np.arange(N[vertex_type]) - feat_names = list(F.get_feature_list().keys()) - base_series = None - for feat_name in feat_names: - if base_series is None: - base_series = F.get_data(v_ids, vertex_type, feat_name) - else: - base_series = np.stack( - [base_series, F.get_data(v_ids, vertex_type, feat_name)] - ) - - tsr = cugraph_store.multi_get_tensor( - [ - CuGraphTensorAttr(vertex_type, feat_name, v_ids) - for feat_name in feat_names - ] - ) - - assert torch.stack(tsr).tolist() == base_series.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_get_all_tensor_attrs(graph): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N) - - tensor_attrs = [] - for vertex_type in sorted(N.keys()): - for prop in ["prop1", "prop2"]: - tensor_attrs.append( - CuGraphTensorAttr( - vertex_type, - prop, - properties=None, - dtype=F.get_data([0], vertex_type, "prop1").dtype, - ) - ) - - for t in tensor_attrs: - print(t) - - print("\n\n") - - for t in cugraph_store.get_all_tensor_attrs(): - print(t) - - assert sorted(tensor_attrs, key=lambda a: (a.group_name, a.attr_name)) == sorted( - cugraph_store.get_all_tensor_attrs(), key=lambda a: (a.group_name, a.attr_name) - ) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_get_tensor_from_tensor_attrs(graph): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - v_ids = np.arange(N[tensor_attr.group_name]) - data = F.get_data(v_ids, tensor_attr.group_name, tensor_attr.attr_name) - - tensor_attr.index = v_ids - assert cugraph_store.get_tensor(tensor_attr).tolist() == data.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_get_tensor_size(graph): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - sz = N[tensor_attr.group_name] - - tensor_attr.index = np.arange(sz) - assert cugraph_store.get_tensor_size(tensor_attr) == torch.Size((sz,)) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif( - isinstance(torch_geometric, MissingModule), reason="pyg not available" -) -@pytest.mark.sg -def test_get_input_nodes(karate_gnn): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N) - - input_node_info = torch_geometric.loader.utils.get_input_nodes( - (cugraph_store, cugraph_store), "type0" - ) - - # PyG 2.4 - if len(input_node_info) == 2: - node_type, input_nodes = input_node_info - # PyG 2.5 - elif len(input_node_info) == 3: - node_type, input_nodes, input_id = input_node_info - # Invalid - else: - raise ValueError("Invalid output from get_input_nodes") - - assert node_type == "type0" - assert input_nodes.tolist() == torch.arange(17, dtype=torch.int32).tolist() - - -@pytest.mark.sg -def test_serialize(multi_edge_multi_vertex_no_graph_1): - import pickle - - F, G, N = multi_edge_multi_vertex_no_graph_1 - cugraph_store = DaskGraphStore(F, G, N) - - cugraph_store_copy = pickle.loads(pickle.dumps(cugraph_store)) - - for tensor_attr in cugraph_store.get_all_tensor_attrs(): - sz = cugraph_store.get_tensor_size(tensor_attr)[0] - tensor_attr.index = np.arange(sz) - assert ( - cugraph_store.get_tensor(tensor_attr).tolist() - == cugraph_store_copy.get_tensor(tensor_attr).tolist() - ) - - # Currently does not store edgelist properly for SG - """ - for edge_attr in cugraph_store.get_all_edge_attrs(): - assert cugraph_store.get_edge_index(edge_attr) \ - == cugraph_store_copy.get_edge_index(edge_attr) - """ diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_dask_graph_store_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_dask_graph_store_mg.py deleted file mode 100644 index 65cb8984586..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_dask_graph_store_mg.py +++ /dev/null @@ -1,424 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cugraph -from cugraph_pyg.data.dask_graph_store import ( - CuGraphTensorAttr, - CuGraphEdgeAttr, - EdgeLayout, -) -from cugraph_pyg.data import DaskGraphStore - -import cudf -import dask_cudf -import cupy -import numpy as np - -from cugraph.utilities.utils import import_optional, MissingModule - -import pytest - - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_tensor_attr(): - ta = CuGraphTensorAttr("group0", "property1") - assert not ta.is_fully_specified() - assert not ta.is_set("index") - - ta.fully_specify() - assert ta.is_fully_specified() - - other_ta = CuGraphTensorAttr(index=[1, 2, 3]) - ta.update(other_ta) - assert ta.index == [1, 2, 3] - - casted_ta1 = CuGraphTensorAttr.cast(ta) - assert casted_ta1 == ta - - casted_ta2 = CuGraphTensorAttr.cast(index=[1, 2, 3]) - assert casted_ta2.index == [1, 2, 3] - assert not casted_ta2.is_fully_specified() - - casted_ta3 = CuGraphTensorAttr.cast( - "group2", - "property2", - [1, 2, 3], - ) - assert casted_ta3.group_name == "group2" - assert casted_ta3.attr_name == "property2" - assert casted_ta3.index == [1, 2, 3] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_edge_attr(): - ea = CuGraphEdgeAttr("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - ea = CuGraphEdgeAttr(edge_type="type1", layout="csr", is_sorted=True) - assert ea.size is None - - ea = CuGraphEdgeAttr.cast("type0", EdgeLayout.COO, False, 10) - assert ea.edge_type == "type0" - assert ea.layout == EdgeLayout.COO - assert not ea.is_sorted - assert ea.size == 10 - - -@pytest.fixture( - params=[ - "basic_graph_1", - "multi_edge_graph_1", - "multi_edge_multi_vertex_graph_1", - ] -) -def graph(request): - return request.getfixturevalue(request.param) - - -@pytest.fixture(params=["basic_graph_1", "multi_edge_graph_1"]) -def single_vertex_graph(request): - return request.getfixturevalue(request.param) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.parametrize( - "edge_index_type", ["numpy", "torch-cpu", "torch-gpu", "cudf", "dask-cudf"] -) -@pytest.mark.mg -def test_get_edge_index(graph, edge_index_type, dask_client): - F, G, N = graph - if "torch" in edge_index_type: - if edge_index_type == "torch-cpu": - device = "cpu" - else: - device = "cuda" - for et in list(G.keys()): - G[et][0] = torch.as_tensor(G[et][0], device=device) - G[et][1] = torch.as_tensor(G[et][1], device=device) - elif edge_index_type == "cudf": - for et in list(G.keys()): - G[et][0] = cudf.Series(G[et][0]) - G[et][1] = cudf.Series(G[et][1]) - elif edge_index_type == "dask-cudf": - for et in list(G.keys()): - G[et][0] = dask_cudf.from_cudf(cudf.Series(G[et][0]), npartitions=1) - G[et][1] = dask_cudf.from_cudf(cudf.Series(G[et][1]), npartitions=1) - - cugraph_store = DaskGraphStore(F, G, N, order="CSC", multi_gpu=True) - - for pyg_can_edge_type in G: - src, dst = cugraph_store.get_edge_index( - edge_type=pyg_can_edge_type, layout="coo", is_sorted=False - ) - - if edge_index_type == "cudf": - assert G[pyg_can_edge_type][0].values_host.tolist() == src.tolist() - assert G[pyg_can_edge_type][1].values_host.tolist() == dst.tolist() - elif edge_index_type == "dask-cudf": - assert ( - G[pyg_can_edge_type][0].compute().values_host.tolist() == src.tolist() - ) - assert ( - G[pyg_can_edge_type][1].compute().values_host.tolist() == dst.tolist() - ) - else: - assert G[pyg_can_edge_type][0].tolist() == src.tolist() - assert G[pyg_can_edge_type][1].tolist() == dst.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_edge_types(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - eta = cugraph_store._edge_types_to_attrs - assert eta.keys() == G.keys() - - for attr_name, attr_repr in eta.items(): - src_size = N[attr_name[0]] - dst_size = N[attr_name[-1]] - assert src_size == attr_repr.size[0] - assert dst_size == attr_repr.size[-1] - assert attr_name == attr_repr.edge_type - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_get_subgraph(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - if len(G.keys()) > 1: - for edge_type in G.keys(): - # Subgraphing is not implemented yet and should raise an error - with pytest.raises(ValueError): - sg = cugraph_store._subgraph([edge_type]) - - sg = cugraph_store._subgraph(list(G.keys())) - assert isinstance(sg, cugraph.MultiGraph) - - num_edges = sum([len(v[0]) for v in G.values()]) - assert sg.number_of_edges() == num_edges - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_renumber_vertices_basic(single_vertex_graph, dask_client): - F, G, N = single_vertex_graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ) - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - assert index["vt1"].tolist() == nodes_of_interest.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_renumber_vertices_multi_edge_multi_vertex( - multi_edge_multi_vertex_graph_1, dask_client -): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - nodes_of_interest = torch.as_tensor( - cupy.random.randint(0, sum(N.values()), 3), device="cuda" - ).unique() - - index = cugraph_store._get_vertex_groups_from_sample(nodes_of_interest) - - black_nodes = nodes_of_interest[nodes_of_interest <= 1] - brown_nodes = nodes_of_interest[nodes_of_interest > 1] - 2 - - if len(black_nodes) > 0: - assert index["black"].tolist() == sorted(black_nodes.tolist()) - if len(brown_nodes) > 0: - assert index["brown"].tolist() == sorted(brown_nodes.tolist()) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_renumber_edges(abc_graph, dask_client): - F, G, N = abc_graph - - graph_store = DaskGraphStore(F, G, N, multi_gpu=True, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - mock_noi_index = { - "A": torch.tensor([0, 1], device="cuda"), - "B": torch.tensor([0, 1], device="cuda"), - "C": torch.tensor([3, 2, 0], device="cuda"), - } - - row_dict, col_dict = graph_store._get_renumbered_edge_groups_from_sample( - mock_sampling_results, mock_noi_index - ) - - assert len(row_dict) == 3 - assert len(col_dict) == 3 - assert row_dict[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert col_dict[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert row_dict[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert col_dict[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert row_dict[("B", "ba", "A")].tolist() == [1, 1] - assert col_dict[("B", "ba", "A")].tolist() == [1, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_get_tensor(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - for feature_name, feature_on_types in F.get_feature_list().items(): - for type_name in feature_on_types: - v_ids = np.arange(N[type_name]) - base_series = F.get_data( - v_ids, - type_name=type_name, - feat_name=feature_name, - ).tolist() - - tsr = cugraph_store.get_tensor( - type_name, feature_name, v_ids, None, cupy.int64 - ).tolist() - - assert tsr == base_series - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_get_tensor_empty_idx(karate_gnn, dask_client): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - t = cugraph_store.get_tensor( - CuGraphTensorAttr(group_name="type0", attr_name="prop0", index=None) - ) - assert t.tolist() == (torch.arange(17, dtype=torch.float32) * 31).tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_multi_get_tensor(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - for vertex_type in sorted(N.keys()): - v_ids = np.arange(N[vertex_type]) - feat_names = list(F.get_feature_list().keys()) - base_series = None - for feat_name in feat_names: - if base_series is None: - base_series = F.get_data(v_ids, vertex_type, feat_name) - else: - base_series = np.stack( - [base_series, F.get_data(v_ids, vertex_type, feat_name)] - ) - - tsr = cugraph_store.multi_get_tensor( - [ - CuGraphTensorAttr(vertex_type, feat_name, v_ids) - for feat_name in feat_names - ] - ) - - assert torch.stack(tsr).tolist() == base_series.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_get_all_tensor_attrs(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - tensor_attrs = [] - for vertex_type in sorted(N.keys()): - for prop in ["prop1", "prop2"]: - tensor_attrs.append( - CuGraphTensorAttr( - vertex_type, - prop, - properties=None, - dtype=F.get_data([0], vertex_type, "prop1").dtype, - ) - ) - - assert sorted(tensor_attrs, key=lambda a: (a.group_name, a.attr_name)) == sorted( - cugraph_store.get_all_tensor_attrs(), key=lambda a: (a.group_name, a.attr_name) - ) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_get_tensor_from_tensor_attrs(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - v_ids = np.arange(N[tensor_attr.group_name]) - data = F.get_data(v_ids, tensor_attr.group_name, tensor_attr.attr_name) - - tensor_attr.index = v_ids - assert cugraph_store.get_tensor(tensor_attr).tolist() == data.tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_get_tensor_size(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - tensor_attrs = cugraph_store.get_all_tensor_attrs() - for tensor_attr in tensor_attrs: - sz = N[tensor_attr.group_name] - - tensor_attr.index = np.arange(sz) - assert cugraph_store.get_tensor_size(tensor_attr) == torch.Size((sz,)) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif( - isinstance(torch_geometric, MissingModule), reason="pyg not available" -) -@pytest.mark.mg -def test_get_input_nodes(karate_gnn, dask_client): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - - nodes = torch_geometric.loader.utils.get_input_nodes( - (cugraph_store, cugraph_store), "type0" - ) - - if len(nodes) == 2: - node_type, input_nodes = nodes - else: - node_type, input_nodes, _ = nodes - - assert node_type == "type0" - assert input_nodes.tolist() == torch.arange(17, dtype=torch.int32).tolist() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_mg_frame_handle(graph, dask_client): - F, G, N = graph - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True) - assert isinstance(cugraph_store._DaskGraphStore__graph._plc_graph, dict) - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_cugraph_loader_large_index(dask_client): - large_index = ( - np.random.randint(0, 1_000_000, (100_000_000,)), - np.random.randint(0, 1_000_000, (100_000_000,)), - ) - - large_features = np.random.randint(0, 50, (1_000_000,)) - F = cugraph.gnn.FeatureStore(backend="torch") - F.add_data(large_features, "N", "f") - - store = DaskGraphStore( - F, - {("N", "e", "N"): large_index}, - {"N": 1_000_000}, - multi_gpu=True, - ) - - graph = store._subgraph() - assert isinstance(graph, cugraph.Graph) - - el = graph.view_edge_list().compute() - assert (el["src"].values_host - large_index[0]).sum() == 0 - assert (el["dst"].values_host - large_index[1]).sum() == 0 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py deleted file mode 100644 index ab5f1e217bb..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph_pyg.data import TensorDictFeatureStore - -torch = import_optional("torch") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_tensordict_feature_store_basic_api(): - feature_store = TensorDictFeatureStore() - - node_features_0 = torch.randint(128, (100, 1000)) - node_features_1 = torch.randint(256, (100, 10)) - - other_features = torch.randint(1024, (10, 5)) - - feature_store["node", "feat0"] = node_features_0 - feature_store["node", "feat1"] = node_features_1 - feature_store["other", "feat"] = other_features - - assert (feature_store["node"]["feat0"][:] == node_features_0).all() - assert (feature_store["node"]["feat1"][:] == node_features_1).all() - assert (feature_store["other"]["feat"][:] == other_features).all() - - assert len(feature_store.get_all_tensor_attrs()) == 3 - - del feature_store["node", "feat0"] - assert len(feature_store.get_all_tensor_attrs()) == 2 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store_mg.py deleted file mode 100644 index f1f514560c8..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_feature_store_mg.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import pytest - -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph_pyg.data import TensorDictFeatureStore, WholeFeatureStore - -torch = import_optional("torch") -pylibwholegraph = import_optional("pylibwholegraph") - - -def run_test_wholegraph_feature_store_basic_api(rank, world_size, dtype): - if dtype == "float32": - torch_dtype = torch.float32 - elif dtype == "int64": - torch_dtype = torch.int64 - - torch.cuda.set_device(rank) - - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size) - - pylibwholegraph.torch.initialize.init( - rank, - world_size, - rank, - world_size, - ) - - features = torch.arange(0, world_size * 2000) - features = features.reshape((features.numel() // 100, 100)).to(torch_dtype) - - tensordict_store = TensorDictFeatureStore() - tensordict_store["node", "fea"] = features - - whole_store = WholeFeatureStore() - whole_store["node", "fea"] = torch.tensor_split(features, world_size)[rank] - - ix = torch.arange(features.shape[0]) - assert ( - whole_store["node", "fea"][ix].cpu() == tensordict_store["node", "fea"][ix] - ).all() - - label = torch.arange(0, features.shape[0]).reshape((features.shape[0], 1)) - tensordict_store["node", "label"] = label - whole_store["node", "label"] = torch.tensor_split(label, world_size)[rank] - - assert ( - whole_store["node", "fea"][ix].cpu() == tensordict_store["node", "fea"][ix] - ).all() - - pylibwholegraph.torch.initialize.finalize() - - -@pytest.mark.skipif( - isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" -) -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.parametrize("dtype", ["float32", "int64"]) -@pytest.mark.mg -def test_wholegraph_feature_store_basic_api(dtype): - world_size = torch.cuda.device_count() - torch.multiprocessing.spawn( - run_test_wholegraph_feature_store_basic_api, - args=( - world_size, - dtype, - ), - nprocs=world_size, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store.py deleted file mode 100644 index a8b93665aad..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph_pyg.data import GraphStore - -torch = import_optional("torch") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_graph_store_basic_api(): - df = karate.get_edgelist() - src = torch.as_tensor(df["src"], device="cuda") - dst = torch.as_tensor(df["dst"], device="cuda") - - ei = torch.stack([dst, src]) - - graph_store = GraphStore() - graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo") - - rei = graph_store.get_edge_index(("person", "knows", "person"), "coo") - - assert (ei == rei).all() - - edge_attrs = graph_store.get_all_edge_attrs() - assert len(edge_attrs) == 1 - - graph_store.remove_edge_index(("person", "knows", "person"), "coo") - edge_attrs = graph_store.get_all_edge_attrs() - assert len(edge_attrs) == 0 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store_mg.py deleted file mode 100644 index 14540b7e17d..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/data/test_graph_store_mg.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph_pyg.data import GraphStore - -torch = import_optional("torch") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_graph_store_basic_api_mg(): - df = karate.get_edgelist() - src = torch.as_tensor(df["src"], device="cuda") - dst = torch.as_tensor(df["dst"], device="cuda") - - ei = torch.stack([dst, src]) - - graph_store = GraphStore(is_multi_gpu=True) - graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo") - - rei = graph_store.get_edge_index(("person", "knows", "person"), "coo") - - assert (ei == rei).all() - - edge_attrs = graph_store.get_all_edge_attrs() - assert len(edge_attrs) == 1 - - graph_store.remove_edge_index(("person", "knows", "person"), "coo") - edge_attrs = graph_store.get_all_edge_attrs() - assert len(edge_attrs) == 0 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_dask_neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_dask_neighbor_loader.py deleted file mode 100644 index 34ef6a59511..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_dask_neighbor_loader.py +++ /dev/null @@ -1,543 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import tempfile -import os - -import cudf -import cupy -import numpy as np - -from cugraph_pyg.loader import DaskNeighborLoader -from cugraph_pyg.loader import BulkSampleLoader -from cugraph_pyg.data import DaskGraphStore -from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv - -from cugraph.gnn import FeatureStore -from cugraph.utilities.utils import import_optional, MissingModule - -from typing import Dict, Tuple - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - -trim_to_layer = import_optional("torch_geometric.utils.trim_to_layer") -if isinstance(trim_to_layer, MissingModule): - trim_to_layer = import_optional("torch_geometric.utils._trim_to_layer") - - -try: - import torch_sparse # noqa: F401 - - HAS_TORCH_SPARSE = True -except: # noqa: E722 - HAS_TORCH_SPARSE = False - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_cugraph_loader_basic( - karate_gnn: Tuple[ - FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] - ] -): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N, order="CSR") - loader = DaskNeighborLoader( - (cugraph_store, cugraph_store), - torch.arange(N["type0"] + N["type1"], dtype=torch.int64), - 10, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - samples = [s for s in loader] - - assert len(samples) == 3 - for sample in samples: - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_cugraph_loader_hetero( - karate_gnn: Tuple[ - FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] - ] -): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N, order="CSR") - loader = DaskNeighborLoader( - (cugraph_store, cugraph_store), - input_nodes=("type1", torch.tensor([0, 1, 2, 5], device="cuda")), - batch_size=2, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - samples = [s for s in loader] - - assert len(samples) == 2 - for sample in samples: - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_cugraph_loader_from_disk(): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - n = torch.arange(1, 1 + len(m), dtype=torch.int32) - x = torch.zeros(256, dtype=torch.int32) - x[torch.tensor(m, dtype=torch.int32)] = n - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9080} - N = {"t0": 256} - - cugraph_store = DaskGraphStore(F, G, N, order="CSR") - - bogus_samples = cudf.DataFrame( - { - "majors": [0, 1, 2, 3, 4, 5, 6, 6], - "minors": [5, 4, 3, 2, 2, 6, 5, 2], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 2], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - ) - - num_samples = 0 - for sample in loader: - num_samples += 1 - assert sample["t0"]["num_nodes"] == 7 - # correct vertex order is [0, 1, 2, 5, 4, 3, 6]; x = [1, 2, 3, 6, 5, 4, 7] - assert sample["t0"]["x"].tolist() == [3, 4, 5, 6, 7, 8, 9] - - edge_index = sample[("t0", "knows", "t0")]["edge_index"] - assert list(edge_index.shape) == [2, 8] - - assert ( - edge_index[0].tolist() == bogus_samples.majors.dropna().values_host.tolist() - ) - assert ( - edge_index[1].tolist() == bogus_samples.minors.dropna().values_host.tolist() - ) - - assert num_samples == 256 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_cugraph_loader_from_disk_subset(): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - n = torch.arange(1, 1 + len(m), dtype=torch.int32) - x = torch.zeros(256, dtype=torch.int32) - x[torch.tensor(m, dtype=torch.int32)] = n - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9080} - N = {"t0": 256} - - cugraph_store = DaskGraphStore(F, G, N, order="CSR") - - bogus_samples = cudf.DataFrame( - { - "majors": [0, 1, 2, 3, 4, 5, 6, 6], - "minors": [5, 4, 3, 2, 2, 6, 5, 2], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 2], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - num_samples = 0 - for sample in loader: - num_samples += 1 - assert sample["t0"]["num_nodes"] == 7 - # correct vertex order is [0, 1, 2, 6, 4, 3, 5]; x = [1, 2, 3, 7, 5, 4, 6] - assert sample["t0"]["x"].tolist() == [3, 4, 5, 6, 7, 8, 9] - - edge_index = sample[("t0", "knows", "t0")]["edge_index"] - assert list(edge_index.shape) == [2, 8] - - assert ( - edge_index[0].tolist() == bogus_samples.majors.dropna().values_host.tolist() - ) - assert ( - edge_index[1].tolist() == bogus_samples.minors.dropna().values_host.tolist() - ) - - assert num_samples == 100 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(not HAS_TORCH_SPARSE, reason="torch-sparse not available") -@pytest.mark.sg -def test_cugraph_loader_from_disk_subset_csr(): - m = [2, 9, 99, 82, 11, 13] - n = torch.arange(1, 1 + len(m), dtype=torch.int32) - x = torch.zeros(256, dtype=torch.int32) - x[torch.tensor(m, dtype=torch.int32)] = n - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9080} - N = {"t0": 256} - - cugraph_store = DaskGraphStore(F, G, N) - - bogus_samples = cudf.DataFrame( - { - "major_offsets": [0, 3, 5, 7, 8, None, None, None], - "minors": [1, 2, 3, 0, 3, 4, 5, 1], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "label_hop_offsets": cudf.Series( - [0, 1, 4, None, None, None, None, None], dtype="int32" - ), - "renumber_map_offsets": cudf.Series([0, 6], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples["map"] = map - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - # offset the offsets - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - num_samples = 0 - for sample in loader: - num_samples += 1 - assert sample["t0"]["num_nodes"] == 6 - - assert sample["t0"]["x"].tolist() == [1, 2, 3, 4, 5, 6] - - edge_index = sample[("t0", "knows", "t0")]["adj_t"] - assert edge_index.size(0) == 4 - assert edge_index.size(1) == 6 - - colptr, row, _ = edge_index.csr() - - assert ( - colptr.tolist() == bogus_samples.major_offsets.dropna().values_host.tolist() - ) - assert row.tolist() == bogus_samples.minors.dropna().values_host.tolist() - - assert sample["t0"]["num_sampled_nodes"] == [1, 3, 2] - assert sample["t0", "knows", "t0"]["num_sampled_edges"] == [3, 5] - - assert num_samples == 100 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_cugraph_loader_e2e_coo(): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - x = torch.randint(3000, (256, 256)).to(torch.float32) - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9999} - N = {"t0": 256} - - cugraph_store = DaskGraphStore(F, G, N, order="CSR") - - bogus_samples = cudf.DataFrame( - { - "majors": [0, 1, 2, 3, 4, 5, 6, 6], - "minors": [5, 4, 3, 2, 2, 6, 5, 2], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 2], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - convs = [ - torch_geometric.nn.SAGEConv(256, 64, aggr="mean").cuda(), - torch_geometric.nn.SAGEConv(64, 8, aggr="mean").cuda(), - torch_geometric.nn.SAGEConv(8, 1, aggr="mean").cuda(), - ] - - trim = trim_to_layer.TrimToLayer() - relu = torch.nn.functional.relu - dropout = torch.nn.functional.dropout - - for hetero_data in loader: - ei = hetero_data["t0", "knows", "t0"]["edge_index"] - x = hetero_data["t0"]["x"].cuda() - num_sampled_nodes = hetero_data["t0"]["num_sampled_nodes"] - num_sampled_edges = hetero_data["t0", "knows", "t0"]["num_sampled_edges"] - - for i in range(len(convs)): - x, ei, _ = trim(i, num_sampled_nodes, num_sampled_edges, x, ei, None) - - s = x.shape[0] - - x = convs[i](x, ei, size=(s, s)) - x = relu(x) - x = dropout(x, p=0.5) - - x = x.narrow(dim=0, start=0, length=x.shape[0] - num_sampled_nodes[1]) - - assert list(x.shape) == [3, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skipif(not HAS_TORCH_SPARSE, reason="torch-sparse not available") -@pytest.mark.parametrize("framework", ["pyg", "cugraph-ops"]) -@pytest.mark.sg -def test_cugraph_loader_e2e_csc(framework: str): - m = [2, 9, 99, 82, 9, 3, 18, 1, 12] - x = torch.randint(3000, (256, 256)).to(torch.float32) - F = FeatureStore() - F.add_data(x, "t0", "x") - - G = {("t0", "knows", "t0"): 9999} - N = {"t0": 256} - - cugraph_store = DaskGraphStore(F, G, N) - - bogus_samples = cudf.DataFrame( - { - "major_offsets": [0, 3, 5, 7, 8, None, None, None], - "minors": [1, 2, 3, 0, 3, 4, 5, 1], - "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"), - "edge_id": [5, 10, 15, 20, 25, 30, 35, 40], - "label_hop_offsets": cudf.Series( - [0, 1, 4, None, None, None, None, None], dtype="int32" - ), - "renumber_map_offsets": cudf.Series([0, 6], dtype="int32"), - } - ) - map = cudf.Series(m, name="map") - bogus_samples = bogus_samples.join(map, how="outer").sort_index() - - tempdir = tempfile.TemporaryDirectory() - for s in range(256): - bogus_samples["batch_id"] = cupy.int32(s) - bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet")) - - loader = BulkSampleLoader( - feature_store=cugraph_store, - graph_store=cugraph_store, - directory=tempdir, - input_files=list(os.listdir(tempdir.name))[100:200], - ) - - if framework == "pyg": - convs = [ - torch_geometric.nn.SAGEConv(256, 64, aggr="mean").cuda(), - torch_geometric.nn.SAGEConv(64, 1, aggr="mean").cuda(), - ] - else: - convs = [ - CuGraphSAGEConv(256, 64, aggr="mean").cuda(), - CuGraphSAGEConv(64, 1, aggr="mean").cuda(), - ] - - trim = trim_to_layer.TrimToLayer() - relu = torch.nn.functional.relu - dropout = torch.nn.functional.dropout - - for hetero_data in loader: - x = hetero_data["t0"]["x"].cuda() - - if framework == "pyg": - ei = hetero_data["t0", "knows", "t0"]["adj_t"].coo() - ei = torch.stack((ei[0], ei[1])) - else: - ei = hetero_data["t0", "knows", "t0"]["adj_t"].csr() - ei = [ei[1], ei[0], x.shape[0]] - - num_sampled_nodes = hetero_data["t0"]["num_sampled_nodes"] - num_sampled_edges = hetero_data["t0", "knows", "t0"]["num_sampled_edges"] - - s = x.shape[0] - for i in range(len(convs)): - if framework == "pyg": - x, ei, _ = trim(i, num_sampled_nodes, num_sampled_edges, x, ei, None) - else: - if i > 0: - x = x.narrow( - dim=0, - start=0, - length=s - num_sampled_nodes[-i], - ) - - ei[0] = ei[0].narrow( - dim=0, - start=0, - length=ei[0].size(0) - num_sampled_edges[-i], - ) - ei[1] = ei[1].narrow( - dim=0, start=0, length=ei[1].size(0) - num_sampled_nodes[-i] - ) - ei[2] = x.size(0) - - s = x.shape[0] - - if framework == "pyg": - x = convs[i](x, ei, size=(s, s)) - else: - x = convs[i](x, ei) - x = relu(x) - x = dropout(x, p=0.5) - - x = x.narrow(dim=0, start=0, length=s - num_sampled_nodes[1]) - - assert list(x.shape) == [1, 1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.parametrize("drop_last", [True, False]) -@pytest.mark.sg -def test_drop_last(drop_last): - N = {"N": 10} - G = { - ("N", "e", "N"): torch.stack( - [torch.tensor([0, 1, 2, 3, 4]), torch.tensor([5, 6, 7, 8, 9])] - ) - } - F = FeatureStore(backend="torch") - F.add_data(torch.arange(10), "N", "z") - - store = DaskGraphStore(F, G, N) - with tempfile.TemporaryDirectory() as dir: - loader = DaskNeighborLoader( - (store, store), - input_nodes=torch.tensor([0, 1, 2, 3, 4]), - num_neighbors=[1], - batch_size=2, - shuffle=False, - drop_last=drop_last, - batches_per_partition=1, - directory=dir, - ) - - t = torch.tensor([]) - for batch in loader: - t = torch.concat([t, batch["N"].z]) - - t = t.tolist() - - files = os.listdir(dir) - assert len(files) == 2 if drop_last else 3 - assert "batch=0-0.parquet" in files - assert "batch=1-1.parquet" in files - if not drop_last: - assert "batch=2-2.parquet" in files - - -@pytest.mark.parametrize("directory", ["local", "temp"]) -@pytest.mark.sg -def test_load_directory( - karate_gnn: Tuple[ - FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] - ], - directory: str, -): - if directory == "local": - local_dir = tempfile.TemporaryDirectory(dir=".") - - cugraph_store = DaskGraphStore(*karate_gnn) - cugraph_loader = DaskNeighborLoader( - (cugraph_store, cugraph_store), - torch.arange(8, dtype=torch.int64), - 2, - num_neighbors=[8, 4, 2], - random_state=62, - replace=False, - directory=None if directory == "temp" else local_dir.name, - batches_per_partition=1, - ) - - it = iter(cugraph_loader) - next_batch = next(it) - assert next_batch is not None - - if directory == "local": - assert len(os.listdir(local_dir.name)) == 4 - - count = 1 - while next(it, None) is not None: - count += 1 - - assert count == 4 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_dask_neighbor_loader_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_dask_neighbor_loader_mg.py deleted file mode 100644 index 9e8a85a5b67..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_dask_neighbor_loader_mg.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.loader import DaskNeighborLoader -from cugraph_pyg.data import DaskGraphStore -from cugraph.utilities.utils import import_optional, MissingModule - -torch = import_optional("torch") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_cugraph_loader_basic(dask_client, karate_gnn): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True, order="CSR") - loader = DaskNeighborLoader( - (cugraph_store, cugraph_store), - torch.arange(N["type0"] + N["type1"], dtype=torch.int64), - 10, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - assert isinstance(cugraph_store._subgraph()._plc_graph, dict) - - samples = [s for s in loader] - - assert len(samples) == 3 - for sample in samples: - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_cugraph_loader_hetero(dask_client, karate_gnn): - F, G, N = karate_gnn - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True, order="CSR") - loader = DaskNeighborLoader( - (cugraph_store, cugraph_store), - input_nodes=("type1", torch.tensor([0, 1, 2, 5], device="cuda")), - batch_size=2, - num_neighbors=[4, 4], - random_state=62, - replace=False, - ) - - samples = [s for s in loader] - - assert len(samples) == 2 - for sample in samples: - print(sample) - if "type0" in sample: - for prop in sample["type0"]["prop0"].tolist(): - assert prop % 31 == 0 - - if "type1" in sample: - for prop in sample["type1"]["prop0"].tolist(): - assert prop % 41 == 0 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py deleted file mode 100644 index 8ee18a826f7..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -import cugraph_pyg -from cugraph_pyg.data import TensorDictFeatureStore, GraphStore -from cugraph_pyg.loader import NeighborLoader - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_neighbor_loader(): - """ - Basic e2e test that covers loading and sampling. - """ - - df = karate.get_edgelist() - src = torch.as_tensor(df["src"], device="cuda") - dst = torch.as_tensor(df["dst"], device="cuda") - - ei = torch.stack([dst, src]) - - graph_store = GraphStore() - graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo") - - feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (34, 16)) - - loader = NeighborLoader( - (feature_store, graph_store), - [5, 5], - input_nodes=torch.arange(34), - ) - - for batch in loader: - assert isinstance(batch, torch_geometric.data.Data) - assert (feature_store["person", "feat"][batch.n_id] == batch.feat).all() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_neighbor_loader_biased(): - eix = torch.tensor( - [ - [3, 4, 5], - [0, 1, 2], - ] - ) - - graph_store = GraphStore() - graph_store.put_edge_index(eix, ("person", "knows", "person"), "coo") - - feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (6, 12)) - feature_store[("person", "knows", "person"), "bias"] = torch.tensor( - [0, 12, 14], dtype=torch.float32 - ) - - loader = NeighborLoader( - (feature_store, graph_store), - [1], - input_nodes=torch.tensor([0, 1, 2], dtype=torch.int64), - batch_size=3, - weight_attr="bias", - ) - - out = list(iter(loader)) - assert len(out) == 1 - out = out[0] - - assert out.edge_index.shape[1] == 2 - assert (out.edge_index.cpu() == torch.tensor([[3, 4], [1, 2]])).all() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -@pytest.mark.parametrize("num_nodes", [10, 25]) -@pytest.mark.parametrize("num_edges", [64, 128]) -@pytest.mark.parametrize("batch_size", [2, 4]) -@pytest.mark.parametrize("select_edges", [16, 32]) -@pytest.mark.parametrize("depth", [1, 3]) -@pytest.mark.parametrize("num_neighbors", [1, 4]) -def test_link_neighbor_loader_basic( - num_nodes, num_edges, batch_size, select_edges, num_neighbors, depth -): - graph_store = GraphStore() - feature_store = TensorDictFeatureStore() - - eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( - [ - torch.randint(0, num_nodes, (num_edges,)), - torch.randint(0, num_nodes, (num_edges,)), - ] - ) - - elx = graph_store[("n", "e", "n"), "coo"][:, eix] - loader = cugraph_pyg.loader.LinkNeighborLoader( - (feature_store, graph_store), - num_neighbors=[num_neighbors] * depth, - edge_label_index=elx, - batch_size=batch_size, - shuffle=False, - ) - - elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) - for i, batch in enumerate(loader): - assert ( - batch.input_id.cpu() == torch.arange(i * batch_size, (i + 1) * batch_size) - ).all() - assert (elx[i] == batch.n_id[batch.edge_label_index.cpu()]).all() - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -@pytest.mark.parametrize("batch_size", [1, 2]) -def test_link_neighbor_loader_negative_sampling_basic(batch_size): - num_edges = 62 - num_nodes = 19 - select_edges = 17 - - graph_store = GraphStore() - feature_store = TensorDictFeatureStore() - - eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( - [ - torch.randint(0, num_nodes, (num_edges,)), - torch.randint(0, num_nodes, (num_edges,)), - ] - ) - - elx = graph_store[("n", "e", "n"), "coo"][:, eix] - loader = cugraph_pyg.loader.LinkNeighborLoader( - (feature_store, graph_store), - num_neighbors=[3, 3, 3], - edge_label_index=elx, - batch_size=batch_size, - neg_sampling="binary", - shuffle=False, - ) - - elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) - for i, batch in enumerate(loader): - assert batch.edge_label[0] == 1.0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -@pytest.mark.parametrize("batch_size", [1, 2]) -def test_link_neighbor_loader_negative_sampling_uneven(batch_size): - num_edges = 62 - num_nodes = 19 - select_edges = 17 - - graph_store = GraphStore() - feature_store = TensorDictFeatureStore() - - eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( - [ - torch.randint(0, num_nodes, (num_edges,)), - torch.randint(0, num_nodes, (num_edges,)), - ] - ) - - elx = graph_store[("n", "e", "n"), "coo"][:, eix] - loader = cugraph_pyg.loader.LinkNeighborLoader( - (feature_store, graph_store), - num_neighbors=[3, 3, 3], - edge_label_index=elx, - batch_size=batch_size, - neg_sampling=torch_geometric.sampler.NegativeSampling("binary", amount=0.1), - shuffle=False, - ) - - elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) - for i, batch in enumerate(loader): - assert batch.edge_label[0] == 1.0 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py deleted file mode 100644 index d1dee01a508..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py +++ /dev/null @@ -1,364 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import os - -from cugraph.datasets import karate -from cugraph.utilities.utils import import_optional, MissingModule - -from cugraph_pyg.data import TensorDictFeatureStore, GraphStore -from cugraph_pyg.loader import NeighborLoader, LinkNeighborLoader - -from cugraph.gnn import ( - cugraph_comms_init, - cugraph_comms_shutdown, - cugraph_comms_create_unique_id, -) - -os.environ["RAPIDS_NO_INITIALIZE"] = "1" - -torch = import_optional("torch") -torch_geometric = import_optional("torch_geometric") - - -def init_pytorch_worker(rank, world_size, cugraph_id): - import rmm - - rmm.reinitialize( - devices=rank, - pool_allocator=False, - ) - - import cupy - - cupy.cuda.Device(rank).use() - from rmm.allocators.cupy import rmm_cupy_allocator - - cupy.cuda.set_allocator(rmm_cupy_allocator) - - from cugraph.testing.mg_utils import enable_spilling - - enable_spilling() - - torch.cuda.set_device(rank) - - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size) - - cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) - - -def run_test_neighbor_loader_mg(rank, uid, world_size, specify_size): - """ - Basic e2e test that covers loading and sampling. - """ - init_pytorch_worker(rank, world_size, uid) - - df = karate.get_edgelist() - src = torch.as_tensor(df["src"], device="cuda") - dst = torch.as_tensor(df["dst"], device="cuda") - - ei = torch.stack([dst, src]) - ei = torch.tensor_split(ei.clone(), world_size, axis=1)[rank] - - sz = (34, 34) if specify_size else None - graph_store = GraphStore(is_multi_gpu=True) - graph_store.put_edge_index(ei, ("person", "knows", "person"), "coo", False, sz) - - feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (34, 16)) - - ix_train = torch.tensor_split(torch.arange(34), world_size, axis=0)[rank] - - loader = NeighborLoader( - (feature_store, graph_store), - [5, 5], - input_nodes=ix_train, - ) - - for batch in loader: - assert isinstance(batch, torch_geometric.data.Data) - assert (feature_store["person", "feat"][batch.n_id] == batch.feat).all() - - cugraph_comms_shutdown() - - -@pytest.mark.skip(reason="deleteme") -@pytest.mark.parametrize("specify_size", [True, False]) -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_neighbor_loader_mg(specify_size): - uid = cugraph_comms_create_unique_id() - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_neighbor_loader_mg, - args=( - uid, - world_size, - specify_size, - ), - nprocs=world_size, - ) - - -def run_test_neighbor_loader_biased_mg(rank, uid, world_size): - init_pytorch_worker(rank, world_size, uid) - - eix = torch.stack( - [ - torch.arange( - 3 * (world_size + rank), - 3 * (world_size + rank + 1), - dtype=torch.int64, - device="cuda", - ), - torch.arange(3 * rank, 3 * (rank + 1), dtype=torch.int64, device="cuda"), - ] - ) - - graph_store = GraphStore(is_multi_gpu=True) - graph_store.put_edge_index(eix, ("person", "knows", "person"), "coo") - - feature_store = TensorDictFeatureStore() - feature_store["person", "feat"] = torch.randint(128, (6 * world_size, 12)) - feature_store[("person", "knows", "person"), "bias"] = torch.concat( - [torch.tensor([0, 1, 1], dtype=torch.float32) for _ in range(world_size)] - ) - - loader = NeighborLoader( - (feature_store, graph_store), - [1], - input_nodes=torch.arange( - 3 * rank, 3 * (rank + 1), dtype=torch.int64, device="cuda" - ), - batch_size=3, - weight_attr="bias", - ) - - out = list(iter(loader)) - assert len(out) == 1 - out = out[0] - - assert ( - out.edge_index.cpu() - == torch.tensor( - [ - [3, 4], - [1, 2], - ] - ) - ).all() - - cugraph_comms_shutdown() - - -@pytest.mark.skip(reason="deleteme") -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_neighbor_loader_biased_mg(): - uid = cugraph_comms_create_unique_id() - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_neighbor_loader_biased_mg, - args=( - uid, - world_size, - ), - nprocs=world_size, - ) - - -def run_test_link_neighbor_loader_basic_mg( - rank, - uid, - world_size, - num_nodes: int, - num_edges: int, - select_edges: int, - batch_size: int, - num_neighbors: int, - depth: int, -): - init_pytorch_worker(rank, world_size, uid) - - graph_store = GraphStore(is_multi_gpu=True) - feature_store = TensorDictFeatureStore() - - eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( - [ - torch.randint(0, num_nodes, (num_edges,)), - torch.randint(0, num_nodes, (num_edges,)), - ] - ) - - elx = graph_store[("n", "e", "n"), "coo"][:, eix] - loader = LinkNeighborLoader( - (feature_store, graph_store), - num_neighbors=[num_neighbors] * depth, - edge_label_index=elx, - batch_size=batch_size, - shuffle=False, - ) - - elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) - for i, batch in enumerate(loader): - assert ( - batch.input_id.cpu() == torch.arange(i * batch_size, (i + 1) * batch_size) - ).all() - assert (elx[i] == batch.n_id[batch.edge_label_index.cpu()]).all() - - cugraph_comms_shutdown() - - -@pytest.mark.skip(reason="deleteme") -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -@pytest.mark.parametrize("select_edges", [64, 128]) -@pytest.mark.parametrize("batch_size", [2, 4]) -@pytest.mark.parametrize("depth", [1, 3]) -def test_link_neighbor_loader_basic_mg(select_edges, batch_size, depth): - num_nodes = 25 - num_edges = 128 - num_neighbors = 2 - - uid = cugraph_comms_create_unique_id() - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_link_neighbor_loader_basic_mg, - args=( - uid, - world_size, - num_nodes, - num_edges, - select_edges, - batch_size, - num_neighbors, - depth, - ), - nprocs=world_size, - ) - - -def run_test_link_neighbor_loader_uneven_mg(rank, uid, world_size, edge_index): - init_pytorch_worker(rank, world_size, uid) - - graph_store = GraphStore(is_multi_gpu=True) - feature_store = TensorDictFeatureStore() - - batch_size = 1 - graph_store[("n", "e", "n"), "coo"] = torch.tensor_split( - edge_index, world_size, dim=-1 - )[rank] - - elx = graph_store[("n", "e", "n"), "coo"] # select all edges on each worker - loader = LinkNeighborLoader( - (feature_store, graph_store), - num_neighbors=[2, 2, 2], - edge_label_index=elx, - batch_size=batch_size, - shuffle=False, - ) - - for i, batch in enumerate(loader): - assert ( - batch.input_id.cpu() == torch.arange(i * batch_size, (i + 1) * batch_size) - ).all() - - assert (elx[:, [i]] == batch.n_id[batch.edge_label_index.cpu()]).all() - - cugraph_comms_shutdown() - - -@pytest.mark.skip(reason="deleteme") -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_link_neighbor_loader_uneven_mg(): - edge_index = torch.tensor( - [ - [0, 1, 3, 4, 7], - [1, 0, 8, 9, 12], - ] - ) - - uid = cugraph_comms_create_unique_id() - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_link_neighbor_loader_uneven_mg, - args=( - uid, - world_size, - edge_index, - ), - nprocs=world_size, - ) - - -def run_test_link_neighbor_loader_negative_sampling_basic_mg( - rank, world_size, uid, batch_size -): - num_edges = 62 * world_size - num_nodes = 19 * world_size - select_edges = 17 - - init_pytorch_worker(rank, world_size, uid) - - graph_store = GraphStore(is_multi_gpu=True) - feature_store = TensorDictFeatureStore() - - eix = torch.randperm(num_edges)[:select_edges] - graph_store[("n", "e", "n"), "coo"] = torch.stack( - [ - torch.randint(0, num_nodes, (num_edges,)), - torch.randint(0, num_nodes, (num_edges,)), - ] - ) - - elx = graph_store[("n", "e", "n"), "coo"][:, eix] - loader = LinkNeighborLoader( - (feature_store, graph_store), - num_neighbors=[3, 3, 3], - edge_label_index=elx, - batch_size=batch_size, - neg_sampling="binary", - shuffle=False, - ) - - elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) - for i, batch in enumerate(loader): - assert batch.edge_label[0] == 1.0 - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -@pytest.mark.parametrize("batch_size", [1, 2]) -def test_link_neighbor_loader_negative_sampling_basic_mg(batch_size): - uid = cugraph_comms_create_unique_id() - world_size = torch.cuda.device_count() - - torch.multiprocessing.spawn( - run_test_link_neighbor_loader_negative_sampling_basic_mg, - args=( - world_size, - uid, - batch_size, - ), - nprocs=world_size, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py deleted file mode 100644 index 92d216fefa3..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import GATConv as CuGraphGATConv -from cugraph_pyg.utils.imports import package_available - -ATOL = 1e-6 - - -@pytest.mark.skipif( - package_available("torch_geometric<2.5"), reason="Test requires pyg>=2.5" -) -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_gat_conv_equality( - use_edge_index, - bias, - bipartite, - concat, - heads, - max_num_neighbors, - use_edge_attr, - graph, - request, -): - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import GATConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - else: - edge_dim = edge_attr = None - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - if use_edge_attr: - csc, edge_attr_perm = CuGraphGATConv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - csc = CuGraphGATConv.to_csc(edge_index, size) - edge_attr_perm = None - - kwargs = dict(bias=bias, concat=concat, edge_dim=edge_dim) - - conv1 = GATConv( - in_channels, out_channels, heads, add_self_loops=False, **kwargs - ).cuda() - conv2 = CuGraphGATConv(in_channels, out_channels, heads, **kwargs).cuda() - - out_dim = heads * out_channels - with torch.no_grad(): - if bipartite: - conv2.lin_src.weight.copy_(conv1.lin_src.weight) - conv2.lin_dst.weight.copy_(conv1.lin_dst.weight) - else: - conv2.lin.weight.copy_(conv1.lin.weight) - - conv2.att[:out_dim].copy_(conv1.att_src.flatten()) - conv2.att[out_dim : 2 * out_dim].copy_(conv1.att_dst.flatten()) - if use_edge_attr: - conv2.att[2 * out_dim :].copy_(conv1.att_edge.flatten()) - conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - if use_edge_index: - out2 = conv2(x, csc, edge_attr=edge_attr, max_num_neighbors=max_num_neighbors) - else: - out2 = conv2( - x, csc, edge_attr=edge_attr_perm, max_num_neighbors=max_num_neighbors - ) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - if bipartite: - assert torch.allclose( - conv1.lin_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL - ) - else: - assert torch.allclose(conv1.lin.weight.grad, conv2.lin.weight.grad, atol=ATOL) - - assert torch.allclose( - conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=ATOL - ) - assert torch.allclose( - conv1.att_dst.grad.flatten(), conv2.att.grad[out_dim : 2 * out_dim], atol=ATOL - ) - - if use_edge_attr: - assert torch.allclose( - conv1.att_edge.grad.flatten(), conv2.att.grad[2 * out_dim :], atol=ATOL - ) - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - ) - - if bias: - assert torch.allclose(conv1.bias.grad, conv2.bias.grad, atol=ATOL) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py deleted file mode 100644 index 2e221922add..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import GATv2Conv as CuGraphGATv2Conv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_gatv2_conv_equality( - use_edge_index, bipartite, concat, heads, use_edge_attr, graph, request -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import GATv2Conv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - else: - edge_dim = edge_attr = None - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - if use_edge_attr: - csc, edge_attr_perm = CuGraphGATv2Conv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - csc = CuGraphGATv2Conv.to_csc(edge_index, size) - edge_attr_perm = None - - kwargs = dict(bias=False, concat=concat, edge_dim=edge_dim) - - conv1 = GATv2Conv( - in_channels, out_channels, heads, add_self_loops=False, **kwargs - ).cuda() - conv2 = CuGraphGATv2Conv(in_channels, out_channels, heads, **kwargs).cuda() - - with torch.no_grad(): - conv2.lin_src.weight.copy_(conv1.lin_l.weight) - conv2.lin_dst.weight.copy_(conv1.lin_r.weight) - conv2.att.copy_(conv1.att.flatten()) - if use_edge_attr: - conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - if use_edge_index: - out2 = conv2(x, csc, edge_attr=edge_attr) - else: - out2 = conv2(x, csc, edge_attr=edge_attr_perm) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - assert torch.allclose(conv1.lin_l.weight.grad, conv2.lin_src.weight.grad, atol=ATOL) - assert torch.allclose(conv1.lin_r.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL) - - assert torch.allclose(conv1.att.grad.flatten(), conv2.att.grad, atol=ATOL) - - if use_edge_attr: - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py deleted file mode 100644 index f182869002a..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import HeteroGATConv as CuGraphHeteroGATConv -from cugraph_pyg.utils.imports import package_available - -ATOL = 1e-6 - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif( - package_available("torch_geometric<2.4"), reason="Test requires pyg>=2.4" -) -@pytest.mark.parametrize("heads", [1, 3, 10]) -@pytest.mark.parametrize("aggr", ["sum", "mean"]) -@pytest.mark.sg -def test_hetero_gat_conv_equality(sample_pyg_hetero_data, aggr, heads): - import torch - from torch_geometric.data import HeteroData - from torch_geometric.nn import HeteroConv, GATConv - - device = torch.device("cuda") - data = HeteroData(sample_pyg_hetero_data).to(device) - - in_channels_dict = {k: v.size(1) for k, v in data.x_dict.items()} - out_channels = 2 - - convs_dict = {} - kwargs1 = dict(heads=heads, add_self_loops=False, bias=False) - for edge_type in data.edge_types: - src_t, _, dst_t = edge_type - in_channels_src, in_channels_dst = data.x_dict[src_t].size(-1), data.x_dict[ - dst_t - ].size(-1) - if src_t == dst_t: - convs_dict[edge_type] = GATConv(in_channels_src, out_channels, **kwargs1) - else: - convs_dict[edge_type] = GATConv( - (in_channels_src, in_channels_dst), out_channels, **kwargs1 - ) - - conv1 = HeteroConv(convs_dict, aggr=aggr).to(device) - kwargs2 = dict( - heads=heads, - aggr=aggr, - node_types=data.node_types, - edge_types=data.edge_types, - bias=False, - ) - conv2 = CuGraphHeteroGATConv(in_channels_dict, out_channels, **kwargs2).to(device) - - # copy over linear and attention weights - w_src, w_dst = conv2.split_tensors(conv2.lin_weights, dim=0) - with torch.no_grad(): - for edge_type in conv2.edge_types: - src_t, _, dst_t = edge_type - if src_t == dst_t: - w_src[edge_type].copy_(conv1.convs[edge_type].lin.weight) - else: - w_src[edge_type].copy_(conv1.convs[edge_type].lin_src.weight) - if w_dst[edge_type] is not None: - w_dst[edge_type].copy_(conv1.convs[edge_type].lin_dst.weight) - - conv2.attn_weights[edge_type][: heads * out_channels].copy_( - conv1.convs[edge_type].att_src.flatten() - ) - conv2.attn_weights[edge_type][heads * out_channels :].copy_( - conv1.convs[edge_type].att_dst.flatten() - ) - - out1 = conv1(data.x_dict, data.edge_index_dict) - out2 = conv2(data.x_dict, data.edge_index_dict) - - for node_type in data.node_types: - assert torch.allclose(out1[node_type], out2[node_type], atol=ATOL) - - loss1 = 0 - loss2 = 0 - for node_type in data.node_types: - loss1 += out1[node_type].mean() - loss2 += out2[node_type].mean() - - loss1.backward() - loss2.backward() - - # check gradient w.r.t attention weights - out_dim = heads * out_channels - for edge_type in conv2.edge_types: - assert torch.allclose( - conv1.convs[edge_type].att_src.grad.flatten(), - conv2.attn_weights[edge_type].grad[:out_dim], - atol=ATOL, - ) - assert torch.allclose( - conv1.convs[edge_type].att_dst.grad.flatten(), - conv2.attn_weights[edge_type].grad[out_dim:], - atol=ATOL, - ) - - # check gradient w.r.t linear weights - grad_lin_weights_ref = dict.fromkeys(out1.keys()) - for node_t, (rels_as_src, rels_as_dst) in conv2.relations_per_ntype.items(): - grad_list = [] - for rel_t in rels_as_src: - src_type, _, dst_type = rel_t - if src_type == dst_type: - grad_list.append(conv1.convs[rel_t].lin.weight.grad.clone()) - else: - grad_list.append(conv1.convs[rel_t].lin_src.weight.grad.clone()) - for rel_t in rels_as_dst: - grad_list.append(conv1.convs[rel_t].lin_dst.weight.grad.clone()) - assert len(grad_list) > 0 - grad_lin_weights_ref[node_t] = torch.vstack(grad_list) - - for node_type in conv2.lin_weights: - assert torch.allclose( - grad_lin_weights_ref[node_type], - conv2.lin_weights[node_type].grad, - atol=ATOL, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py deleted file mode 100644 index 8b06cb2e180..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import RGCNConv as CuGraphRGCNConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("aggr", ["add", "sum", "mean"]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("num_bases", [1, 2, None]) -@pytest.mark.parametrize("root_weight", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_rgcn_conv_equality( - use_edge_index, - aggr, - bias, - max_num_neighbors, - num_bases, - root_weight, - graph, - request, -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import FastRGCNConv as RGCNConv - - torch.manual_seed(12345) - in_channels, out_channels, num_relations = (4, 2, 3) - kwargs = dict(aggr=aggr, bias=bias, num_bases=num_bases, root_weight=root_weight) - - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - edge_type = torch.randint(num_relations, (edge_index.size(1),)).cuda() - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - csc, edge_type_perm = CuGraphRGCNConv.to_csc(edge_index, size, edge_type) - - x = torch.rand(size[0], in_channels, device="cuda") - - conv1 = RGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() - conv2 = CuGraphRGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() - - with torch.no_grad(): - if root_weight: - conv2.weight[:-1].copy_(conv1.weight) - conv2.weight[-1].copy_(conv1.root) - else: - conv2.weight.copy_(conv1.weight) - if num_bases is not None: - conv2.comp.copy_(conv1.comp) - - out1 = conv1(x, edge_index, edge_type) - if use_edge_index: - out2 = conv2(x, csc, edge_type) - else: - out2 = conv2(x, csc, edge_type_perm, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.rand_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - if root_weight: - assert torch.allclose(conv1.weight.grad, conv2.weight.grad[:-1], atol=ATOL) - assert torch.allclose(conv1.root.grad, conv2.weight.grad[-1], atol=ATOL) - else: - assert torch.allclose(conv1.weight.grad, conv2.weight.grad, atol=ATOL) - - if num_bases is not None: - assert torch.allclose(conv1.comp.grad, conv2.comp.grad, atol=ATOL) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py deleted file mode 100644 index 878ceff632a..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("aggr", ["sum", "mean", "min", "max"]) -@pytest.mark.parametrize("bias", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("max_num_neighbors", [8, None]) -@pytest.mark.parametrize("normalize", [True, False]) -@pytest.mark.parametrize("root_weight", [True, False]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_sage_conv_equality( - use_edge_index, - aggr, - bias, - bipartite, - max_num_neighbors, - normalize, - root_weight, - graph, - request, -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import SAGEConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - csc = CuGraphSAGEConv.to_csc(edge_index, size) - - if bipartite: - in_channels = (7, 3) - x = ( - torch.rand(size[0], in_channels[0]).cuda(), - torch.rand(size[1], in_channels[1]).cuda(), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels).cuda() - out_channels = 4 - - kwargs = dict(aggr=aggr, bias=bias, normalize=normalize, root_weight=root_weight) - - conv1 = SAGEConv(in_channels, out_channels, **kwargs).cuda() - conv2 = CuGraphSAGEConv(in_channels, out_channels, **kwargs).cuda() - - in_channels_src = conv2.in_channels_src - with torch.no_grad(): - conv2.lin.weight[:, :in_channels_src].copy_(conv1.lin_l.weight) - if root_weight: - conv2.lin.weight[:, in_channels_src:].copy_(conv1.lin_r.weight) - if bias: - conv2.lin.bias.copy_(conv1.lin_l.bias) - - out1 = conv1(x, edge_index) - out2 = conv2(x, csc, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=ATOL) - - grad_out = torch.rand_like(out1) - out1.backward(grad_out) - out2.backward(grad_out) - - assert torch.allclose( - conv1.lin_l.weight.grad, - conv2.lin.weight.grad[:, :in_channels_src], - atol=ATOL, - ) - - if root_weight: - assert torch.allclose( - conv1.lin_r.weight.grad, - conv2.lin.weight.grad[:, in_channels_src:], - atol=ATOL, - ) - - if bias: - assert torch.allclose( - conv1.lin_l.bias.grad, - conv2.lin.bias.grad, - atol=ATOL, - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py deleted file mode 100644 index d207a4d7947..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from cugraph_pyg.nn import TransformerConv as CuGraphTransformerConv - -ATOL = 1e-6 - - -@pytest.mark.parametrize("use_edge_index", [True, False]) -@pytest.mark.parametrize("use_edge_attr", [True, False]) -@pytest.mark.parametrize("bipartite", [True, False]) -@pytest.mark.parametrize("concat", [True, False]) -@pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) -@pytest.mark.sg -def test_transformer_conv_equality( - use_edge_index, use_edge_attr, bipartite, concat, heads, graph, request -): - pytest.importorskip("torch_geometric", reason="PyG not available") - import torch - from torch_geometric import EdgeIndex - from torch_geometric.nn import TransformerConv - - torch.manual_seed(12345) - edge_index, size = request.getfixturevalue(graph) - edge_index = edge_index.cuda() - - if bipartite: - in_channels = (5, 3) - x = ( - torch.rand(size[0], in_channels[0], device="cuda"), - torch.rand(size[1], in_channels[1], device="cuda"), - ) - else: - in_channels = 5 - x = torch.rand(size[0], in_channels, device="cuda") - out_channels = 2 - - if use_edge_attr: - edge_dim = 3 - edge_attr = torch.rand(edge_index.size(1), edge_dim).cuda() - else: - edge_dim = edge_attr = None - - if use_edge_index: - csc = EdgeIndex(edge_index, sparse_size=size) - else: - if use_edge_attr: - csc, edge_attr_perm = CuGraphTransformerConv.to_csc( - edge_index, size, edge_attr=edge_attr - ) - else: - csc = CuGraphTransformerConv.to_csc(edge_index, size) - edge_attr_perm = None - - kwargs = dict(concat=concat, bias=False, edge_dim=edge_dim, root_weight=False) - - conv1 = TransformerConv(in_channels, out_channels, heads, **kwargs).cuda() - conv2 = CuGraphTransformerConv(in_channels, out_channels, heads, **kwargs).cuda() - - with torch.no_grad(): - conv2.lin_query.weight.copy_(conv1.lin_query.weight) - conv2.lin_key.weight.copy_(conv1.lin_key.weight) - conv2.lin_value.weight.copy_(conv1.lin_value.weight) - conv2.lin_query.bias.copy_(conv1.lin_query.bias) - conv2.lin_key.bias.copy_(conv1.lin_key.bias) - conv2.lin_value.bias.copy_(conv1.lin_value.bias) - if use_edge_attr: - conv2.lin_edge.weight.copy_(conv1.lin_edge.weight) - - out1 = conv1(x, edge_index, edge_attr=edge_attr) - if use_edge_index: - out2 = conv2(x, csc, edge_attr=edge_attr) - else: - out2 = conv2(x, csc, edge_attr=edge_attr_perm) - - assert torch.allclose(out1, out2, atol=ATOL) - - grad_output = torch.rand_like(out1) - out1.backward(grad_output) - out2.backward(grad_output) - - assert torch.allclose( - conv1.lin_query.weight.grad, conv2.lin_query.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_key.weight.grad, conv2.lin_key.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_value.weight.grad, conv2.lin_value.weight.grad, atol=ATOL - ) - assert torch.allclose( - conv1.lin_query.bias.grad, conv2.lin_query.bias.grad, atol=ATOL - ) - assert torch.allclose(conv1.lin_key.bias.grad, conv2.lin_key.bias.grad, atol=ATOL) - assert torch.allclose( - conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=ATOL - ) - - if use_edge_attr: - assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL - ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/pytest.ini b/python/cugraph-pyg/cugraph_pyg/tests/pytest.ini deleted file mode 100644 index 7b0a9f29fb1..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -[pytest] -addopts = --tb=native diff --git a/python/cugraph-pyg/cugraph_pyg/tests/sampler/test_sampler_utils.py b/python/cugraph-pyg/cugraph_pyg/tests/sampler/test_sampler_utils.py deleted file mode 100644 index 7659fdc386f..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/sampler/test_sampler_utils.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cudf -import cupy - -import pytest - -from cugraph_pyg.data import DaskGraphStore -from cugraph_pyg.sampler.sampler_utils import ( - _sampler_output_from_sampling_results_heterogeneous, -) - -from cugraph.utilities.utils import import_optional, MissingModule -from cugraph import uniform_neighbor_sample - -torch = import_optional("torch") - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_neighbor_sample(basic_graph_1): - F, G, N = basic_graph_1 - cugraph_store = DaskGraphStore(F, G, N, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batch": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - with_batch_ids=True, - random_state=62, - return_offsets=False, - use_legacy_names=False, - ).sort_values(by=["majors", "minors"]) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert sorted(node_ids.tolist()) == actual_vertex_ids.tolist() - - assert ( - row_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][0].tolist() - ) - assert ( - col_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][1].tolist() - ) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 1 - assert out.num_sampled_nodes["vt1"] == [4, 1] - - assert len(out.num_sampled_edges) == 1 - assert out.num_sampled_edges[("vt1", "pig", "vt1")] == [6] - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_neighbor_sample_multi_vertex(multi_edge_multi_vertex_graph_1): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = DaskGraphStore(F, G, N, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batch": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - random_state=62, - return_offsets=False, - with_batch_ids=True, - use_legacy_names=False, - ).sort_values(by=["majors", "minors"]) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert node_ids.tolist() == sorted(actual_vertex_ids.tolist()) - - for edge_type, ei in G.items(): - assert sorted(row_dict[edge_type].tolist()) == sorted(ei[0].tolist()) - assert sorted(col_dict[edge_type].tolist()) == sorted(ei[1].tolist()) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 2 - assert out.num_sampled_nodes["black"] == [2, 0] - assert out.num_sampled_nodes["brown"] == [3, 0] - - assert len(out.num_sampled_edges) == 5 - assert out.num_sampled_edges[("brown", "horse", "brown")] == [2] - assert out.num_sampled_edges[("brown", "tortoise", "black")] == [3] - assert out.num_sampled_edges[("brown", "mongoose", "black")] == [2] - assert out.num_sampled_edges[("black", "cow", "brown")] == [2] - assert out.num_sampled_edges[("black", "snake", "black")] == [1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.sg -def test_neighbor_sample_mock_sampling_results(abc_graph): - F, G, N = abc_graph - - graph_store = DaskGraphStore(F, G, N, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - mock_sampling_results, None, graph_store, None - ) - - assert out.metadata is None - assert len(out.node) == 3 - assert out.node["A"].tolist() == [0, 1] - assert out.node["B"].tolist() == [0, 1] - assert out.node["C"].tolist() == [3, 2, 0] - - assert len(out.row) == 3 - assert len(out.col) == 3 - assert out.row[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert out.col[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert out.row[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert out.col[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert out.row[("B", "ba", "A")].tolist() == [1, 1] - assert out.col[("B", "ba", "A")].tolist() == [1, 1] - - assert len(out.num_sampled_nodes) == 3 - assert out.num_sampled_nodes["A"] == [2, 0, 0, 0, 0] - assert out.num_sampled_nodes["B"] == [0, 2, 0, 0, 0] - assert out.num_sampled_nodes["C"] == [0, 0, 2, 0, 1] - - assert len(out.num_sampled_edges) == 3 - assert out.num_sampled_edges[("A", "ab", "B")] == [3, 0, 1, 0] - assert out.num_sampled_edges[("B", "ba", "A")] == [0, 1, 0, 1] - assert out.num_sampled_edges[("B", "bc", "C")] == [0, 2, 0, 2] diff --git a/python/cugraph-pyg/cugraph_pyg/tests/sampler/test_sampler_utils_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/sampler/test_sampler_utils_mg.py deleted file mode 100644 index 91e0668b3c1..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/sampler/test_sampler_utils_mg.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cudf -import cupy - -import pytest - -from cugraph_pyg.data import DaskGraphStore -from cugraph_pyg.sampler.sampler_utils import ( - _sampler_output_from_sampling_results_heterogeneous, -) - -from cugraph.gnn import FeatureStore - -from cugraph.utilities.utils import import_optional, MissingModule -from cugraph.dask import uniform_neighbor_sample - -torch = import_optional("torch") - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_neighbor_sample(dask_client, basic_graph_1): - F, G, N = basic_graph_1 - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batch": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = ( - uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - with_batch_ids=True, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - random_state=62, - return_offsets=False, - return_hops=True, - use_legacy_names=False, - ) - .compute() - .sort_values(by=["majors", "minors"]) - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert sorted(node_ids.tolist()) == actual_vertex_ids.tolist() - - assert ( - row_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][0].tolist() - ) - assert ( - col_dict[("vt1", "pig", "vt1")].tolist() == G[("vt1", "pig", "vt1")][1].tolist() - ) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 1 - assert out.num_sampled_nodes["vt1"] == [4, 1] - - assert len(out.num_sampled_edges) == 1 - assert out.num_sampled_edges[("vt1", "pig", "vt1")] == [6] - - -@pytest.mark.cugraph_ops -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.skip(reason="broken") -@pytest.mark.mg -def test_neighbor_sample_multi_vertex(dask_client, multi_edge_multi_vertex_graph_1): - F, G, N = multi_edge_multi_vertex_graph_1 - cugraph_store = DaskGraphStore(F, G, N, multi_gpu=True, order="CSR") - - batches = cudf.DataFrame( - { - "start": cudf.Series([0, 1, 2, 3, 4], dtype="int64"), - "batches": cudf.Series(cupy.zeros(5, dtype="int32")), - } - ) - - sampling_results = ( - uniform_neighbor_sample( - cugraph_store._subgraph(), - batches, - fanout_vals=[-1], - with_replacement=False, - with_edge_properties=True, - random_state=62, - return_offsets=False, - with_batch_ids=True, - use_legacy_names=False, - ) - .sort_values(by=["majors", "minors"]) - .compute() - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - sampling_results=sampling_results, - renumber_map=None, - graph_store=cugraph_store, - metadata=torch.arange(6, dtype=torch.int64), - ) - - noi_groups = out.node - row_dict = out.row - col_dict = out.col - metadata = out.metadata - - assert metadata.tolist() == list(range(6)) - - for node_type, node_ids in noi_groups.items(): - actual_vertex_ids = torch.arange(N[node_type]) - - assert node_ids.tolist() == sorted(actual_vertex_ids.tolist()) - - for edge_type, ei in G.items(): - assert sorted(row_dict[edge_type].tolist()) == sorted(ei[0].tolist()) - assert sorted(col_dict[edge_type].tolist()) == sorted(ei[1].tolist()) - - # check the hop dictionaries - assert len(out.num_sampled_nodes) == 2 - assert out.num_sampled_nodes["black"].tolist() == [2, 0] - assert out.num_sampled_nodes["brown"].tolist() == [3, 0] - - assert len(out.num_sampled_edges) == 5 - assert out.num_sampled_edges[("brown", "horse", "brown")].tolist() == [2] - assert out.num_sampled_edges[("brown", "tortoise", "black")].tolist() == [3] - assert out.num_sampled_edges[("brown", "mongoose", "black")].tolist() == [2] - assert out.num_sampled_edges[("black", "cow", "brown")].tolist() == [2] - assert out.num_sampled_edges[("black", "snake", "black")].tolist() == [1] - - -@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -@pytest.mark.mg -def test_neighbor_sample_mock_sampling_results(dask_client): - N = { - "A": 2, # 0, 1 - "B": 3, # 2, 3, 4 - "C": 4, # 5, 6, 7, 8 - } - - G = { - # (0->2, 0->3, 1->3) - ("A", "ab", "B"): [ - torch.tensor([0, 0, 1], dtype=torch.int64), - torch.tensor([0, 1, 1], dtype=torch.int64), - ], - # (2->0, 2->1, 3->1, 4->0) - ("B", "ba", "A"): [ - torch.tensor([0, 0, 1, 2], dtype=torch.int64), - torch.tensor([0, 1, 1, 0], dtype=torch.int64), - ], - # (2->6, 2->8, 3->5, 3->7, 4->5, 4->8) - ("B", "bc", "C"): [ - torch.tensor([0, 0, 1, 1, 2, 2], dtype=torch.int64), - torch.tensor([1, 3, 0, 2, 0, 3], dtype=torch.int64), - ], - } - - F = FeatureStore() - F.add_data( - torch.tensor([3.2, 2.1], dtype=torch.float32), type_name="A", feat_name="prop1" - ) - - graph_store = DaskGraphStore(F, G, N, multi_gpu=True, order="CSR") - - # let 0, 1 be the start vertices, fanout = [2, 1, 2, 3] - mock_sampling_results = cudf.DataFrame( - { - "majors": cudf.Series([0, 0, 1, 2, 3, 3, 1, 3, 3, 3], dtype="int64"), - "minors": cudf.Series([2, 3, 3, 8, 1, 7, 3, 1, 5, 7], dtype="int64"), - "hop_id": cudf.Series([0, 0, 0, 1, 1, 1, 2, 3, 3, 3], dtype="int32"), - "edge_type": cudf.Series([0, 0, 0, 2, 1, 2, 0, 1, 2, 2], dtype="int32"), - } - ) - - out = _sampler_output_from_sampling_results_heterogeneous( - mock_sampling_results, None, graph_store, None - ) - - assert out.metadata is None - assert len(out.node) == 3 - assert out.node["A"].tolist() == [0, 1] - assert out.node["B"].tolist() == [0, 1] - assert out.node["C"].tolist() == [3, 2, 0] - - assert len(out.row) == 3 - assert len(out.col) == 3 - assert out.row[("A", "ab", "B")].tolist() == [0, 0, 1, 1] - assert out.col[("A", "ab", "B")].tolist() == [0, 1, 1, 1] - assert out.row[("B", "bc", "C")].tolist() == [0, 1, 1, 1] - assert out.col[("B", "bc", "C")].tolist() == [0, 1, 2, 1] - assert out.row[("B", "ba", "A")].tolist() == [1, 1] - assert out.col[("B", "ba", "A")].tolist() == [1, 1] - - assert len(out.num_sampled_nodes) == 3 - assert out.num_sampled_nodes["A"] == [2, 0, 0, 0, 0] - assert out.num_sampled_nodes["B"] == [0, 2, 0, 0, 0] - assert out.num_sampled_nodes["C"] == [0, 0, 2, 0, 1] - - assert len(out.num_sampled_edges) == 3 - assert out.num_sampled_edges[("A", "ab", "B")] == [3, 0, 1, 0] - assert out.num_sampled_edges[("B", "ba", "A")] == [0, 1, 0, 1] - assert out.num_sampled_edges[("B", "bc", "C")] == [0, 2, 0, 2] diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_version.py b/python/cugraph-pyg/cugraph_pyg/tests/test_version.py deleted file mode 100644 index 4ea0f9875f5..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/tests/test_version.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -import cugraph_pyg - - -def test_version_constants_are_populated(): - # __git_commit__ will only be non-empty in a built distribution - assert isinstance(cugraph_pyg.__git_commit__, str) - - # __version__ should always be non-empty - assert isinstance(cugraph_pyg.__version__, str) - assert len(cugraph_pyg.__version__) > 0 diff --git a/python/cugraph-pyg/cugraph_pyg/utils/__init__.py b/python/cugraph-pyg/cugraph_pyg/utils/__init__.py deleted file mode 100644 index aeae6078111..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/utils/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/cugraph-pyg/cugraph_pyg/utils/imports.py b/python/cugraph-pyg/cugraph_pyg/utils/imports.py deleted file mode 100644 index 1cc865a1f35..00000000000 --- a/python/cugraph-pyg/cugraph_pyg/utils/imports.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from packaging.requirements import Requirement -from importlib import import_module - - -def package_available(requirement: str) -> bool: - """Check if a package is installed and meets the version requirement.""" - req = Requirement(requirement) - try: - pkg = import_module(req.name) - except ImportError: - return False - - if len(req.specifier) > 0: - if hasattr(pkg, "__version__"): - return pkg.__version__ in req.specifier - else: - return False - - return True diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml deleted file mode 100644 index a30cd375635..00000000000 --- a/python/cugraph-pyg/pyproject.toml +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. - -[build-system] - -requires = [ - "rapids-build-backend>=0.3.1,<0.4.0.dev0", - "setuptools>=61.0.0", - "wheel", -] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. -build-backend = "rapids_build_backend.build" - -[tool.pytest.ini_options] -testpaths = ["cugraph_pyg/tests"] - -[project] -name = "cugraph-pyg" -dynamic = ["version"] -description = "cugraph-pyg - PyG support for cuGraph massive-scale, ultra-fast GPU graph analytics." -authors = [ - { name = "NVIDIA Corporation" }, -] -license = { text = "Apache 2.0" } -requires-python = ">=3.10" -classifiers = [ - "Intended Audience :: Developers", - "Programming Language :: Python", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", -] -dependencies = [ - "cugraph==24.12.*,>=0.0.0a0", - "numba>=0.57", - "numpy>=1.23,<3.0a0", - "pylibcugraphops==24.12.*,>=0.0.0a0", -] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. - -[project.urls] -Homepage = "https://github.com/rapidsai/cugraph" -Documentation = "https://docs.rapids.ai/api/cugraph/stable/" - -[project.optional-dependencies] -test = [ - "pandas", - "pylibwholegraph==24.12.*,>=0.0.0a0", - "pytest", - "pytest-benchmark", - "pytest-cov", - "pytest-xdist", - "scipy", - "tensordict>=0.1.2", - "torch>=2.3", -] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. - -[tool.setuptools] -license-files = ["LICENSE"] - -[tool.setuptools.dynamic] -version = {file = "cugraph_pyg/VERSION"} - -[tool.setuptools.packages.find] -include = [ - "cugraph_pyg*", - "cugraph_pyg.*", -] - -[tool.rapids-build-backend] -build-backend = "setuptools.build_meta" -dependencies-file = "../../dependencies.yaml" -matrix-entry = "cuda_suffixed=true" diff --git a/python/cugraph-pyg/pytest.ini b/python/cugraph-pyg/pytest.ini deleted file mode 100644 index 07c4ffa0958..00000000000 --- a/python/cugraph-pyg/pytest.ini +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[pytest] -addopts = - --benchmark-warmup=off - --benchmark-max-time=0 - --benchmark-min-rounds=1 - --benchmark-columns="mean, rounds" - --tb=native - ## do not run slow tests/benchmarks by default - -m "not slow" - -markers = - slow: slow-running tests/benchmarks - cugraph_ops: Tests requiring cugraph-ops - mg: Test MG code paths - number of gpu > 1 - sg: Test SG code paths and dask sg tests - number of gpu == 1 - -python_classes = - Bench* - Test* - -python_files = - bench_* - test_* - -python_functions = - bench_* - test_* diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml index f388fd4c126..b9a20430d12 100644 --- a/python/cugraph-service/server/pyproject.toml +++ b/python/cugraph-service/server/pyproject.toml @@ -20,18 +20,18 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cudf==24.12.*,>=0.0.0a0", - "cugraph-service-client==24.12.*,>=0.0.0a0", - "cugraph==24.12.*,>=0.0.0a0", + "cudf==25.2.*,>=0.0.0a0", + "cugraph-service-client==25.2.*,>=0.0.0a0", + "cugraph==25.2.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", - "dask-cuda==24.12.*,>=0.0.0a0", - "dask-cudf==24.12.*,>=0.0.0a0", + "dask-cuda==25.2.*,>=0.0.0a0", + "dask-cudf==25.2.*,>=0.0.0a0", "numba>=0.57", "numpy>=1.23,<3.0a0", - "rapids-dask-dependency==24.12.*,>=0.0.0a0", - "rmm==24.12.*,>=0.0.0a0", + "rapids-dask-dependency==25.2.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", "thriftpy2!=0.5.0,!=0.5.1", - "ucx-py==0.41.*,>=0.0.0a0", + "ucx-py==0.42.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py index 0b411c2eed2..d84069ddec8 100644 --- a/python/cugraph/cugraph/cores/core_number.py +++ b/python/cugraph/cugraph/cores/core_number.py @@ -23,19 +23,16 @@ def core_number(G, degree_type="bidirectional"): """ Compute the core numbers for the nodes of the graph G. A k-core of a graph - is a maximal subgraph that contains nodes of degree k or more. - A node has a core number of k if it belongs a k-core but not to k+1-core. - This call does not support a graph with self-loops and parallel - edges. + is a maximal subgraph that contains nodes of degree k or more. A node has + a core number of k if it belongs to a k-core but not to k+1-core. This + call does not support a graph with self-loops and parallel edges. Parameters ---------- G : cuGraph.Graph or networkx.Graph - The graph should contain undirected edges where undirected edges are - represented as directed edges in both directions. While this graph - can contain edge weights, they don't participate in the calculation + The current implementation only supports undirected graphs. The graph + can contain edge weights, but they don't participate in the calculation of the core numbers. - The current implementation only supports undirected graphs. .. deprecated:: 24.12 Accepting a ``networkx.Graph`` is deprecated and will be removed in a @@ -43,9 +40,10 @@ def core_number(G, degree_type="bidirectional"): the ``nx-cugraph`` backend. See: https://rapids.ai/nx-cugraph/ degree_type: str, (default="bidirectional") - This option determines if the core number computation should be based - on input, output, or both directed edges, with valid values being - "incoming", "outgoing", and "bidirectional" respectively. + This option is currently ignored. This option may eventually determine + if the core number computation should be based on input, output, or + both directed edges, with valid values being "incoming", "outgoing", + and "bidirectional" respectively. Returns ------- @@ -63,7 +61,13 @@ def core_number(G, degree_type="bidirectional"): >>> from cugraph.datasets import karate >>> G = karate.get_graph(download=True) >>> df = cugraph.core_number(G) - + >>> df.head() + vertex core_number + 0 33 4 + 1 0 4 + 2 32 4 + 3 2 4 + 4 1 4 """ G, isNx = ensure_cugraph_obj_for_nx(G) @@ -71,11 +75,14 @@ def core_number(G, degree_type="bidirectional"): if G.is_directed(): raise ValueError("input graph must be undirected") - if degree_type not in ["incoming", "outgoing", "bidirectional"]: - raise ValueError( - f"'degree_type' must be either incoming, " - f"outgoing or bidirectional, got: {degree_type}" - ) + # degree_type is currently ignored until libcugraph supports directed + # graphs for core_number. Once supporteed, degree_type should be checked + # like so: + # if degree_type not in ["incoming", "outgoing", "bidirectional"]: + # raise ValueError( + # f"'degree_type' must be either incoming, " + # f"outgoing or bidirectional, got: {degree_type}" + # ) vertex, core_number = pylibcugraph_core_number( resource_handle=ResourceHandle(), diff --git a/python/cugraph/cugraph/dask/cores/core_number.py b/python/cugraph/cugraph/dask/cores/core_number.py index 4ae1fb547d1..3266348f735 100644 --- a/python/cugraph/cugraph/dask/cores/core_number.py +++ b/python/cugraph/cugraph/dask/cores/core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -53,15 +53,15 @@ def core_number(input_graph, degree_type="bidirectional"): Parameters ---------- input_graph : cugraph.graph - cuGraph graph descriptor, should contain the connectivity information, - (edge weights are not used in this algorithm). - The current implementation only supports undirected graphs. + The current implementation only supports undirected graphs. The graph + can contain edge weights, but they don't participate in the calculation + of the core numbers. degree_type: str, (default="bidirectional") - This option determines if the core number computation should be based - on input, output, or both directed edges, with valid values being - "incoming", "outgoing", and "bidirectional" respectively. - + This option is currently ignored. This option may eventually determine + if the core number computation should be based on input, output, or + both directed edges, with valid values being "incoming", "outgoing", + and "bidirectional" respectively. Returns ------- @@ -77,11 +77,14 @@ def core_number(input_graph, degree_type="bidirectional"): if input_graph.is_directed(): raise ValueError("input graph must be undirected") - if degree_type not in ["incoming", "outgoing", "bidirectional"]: - raise ValueError( - f"'degree_type' must be either incoming, " - f"outgoing or bidirectional, got: {degree_type}" - ) + # degree_type is currently ignored until libcugraph supports directed + # graphs for core_number. Once supporteed, degree_type should be checked + # like so: + # if degree_type not in ["incoming", "outgoing", "bidirectional"]: + # raise ValueError( + # f"'degree_type' must be either incoming, " + # f"outgoing or bidirectional, got: {degree_type}" + # ) # Initialize dask client client = default_client() diff --git a/python/cugraph/cugraph/datasets/__init__.py b/python/cugraph/cugraph/datasets/__init__.py index ac18274d354..ecf10f3c4ef 100644 --- a/python/cugraph/cugraph/datasets/__init__.py +++ b/python/cugraph/cugraph/datasets/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -49,3 +49,4 @@ europe_osm = Dataset(meta_path / "europe_osm.yaml") # 1.5 GB hollywood = Dataset(meta_path / "hollywood.yaml") +amazon0302 = Dataset(meta_path / "amazon0302.yaml") diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py index 15c30700fc3..63389cbc16a 100644 --- a/python/cugraph/cugraph/datasets/dataset.py +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -352,7 +352,9 @@ def get_dask_graph( If True, stores the transpose of the adjacency matrix. Required for certain algorithms. """ - if self._edgelist is None: + if self._edgelist is None or not isinstance( + self._edgelist, dask_cudf.DataFrame + ): self.get_dask_edgelist(download=download) if create_using is None: diff --git a/python/cugraph/cugraph/datasets/metadata/amazon0302.yaml b/python/cugraph/cugraph/datasets/metadata/amazon0302.yaml new file mode 100644 index 00000000000..b02c936a06e --- /dev/null +++ b/python/cugraph/cugraph/datasets/metadata/amazon0302.yaml @@ -0,0 +1,26 @@ +name: amazon0302 +file_type: .gz +description: + This network was collected by crawling the Amazon website. It is based on the + "Customers Who Bought This Item Also Bought" feature of the Amazon website. + If product i is frequently co-purchased with product j, the graph contains a + directed edge from i to j. The data was collected in March 02 2003. +author: J. Leskovec, L. Adamic and B. Adamic +refs: + J. Leskovec, L. Adamic and B. Adamic. The Dynamics of Viral Marketing. + ACM Transactions on the Web (ACM TWEB), 1(1), 2007. +delim: "\t" +header: 3 +col_names: + - FromNodeId + - ToNodeId +col_types: + - int32 + - int32 +has_loop: false +is_directed: true +is_multigraph: false +is_symmetric: false +number_of_edges: 1234877 +number_of_nodes: 262111 +url: https://snap.stanford.edu/data/amazon0302.txt.gz diff --git a/python/cugraph/cugraph/structure/graph_classes.py b/python/cugraph/cugraph/structure/graph_classes.py index 84234f7e904..90f809fa6c1 100644 --- a/python/cugraph/cugraph/structure/graph_classes.py +++ b/python/cugraph/cugraph/structure/graph_classes.py @@ -115,7 +115,6 @@ def from_cudf_edgelist( edge_type=None, renumber=True, store_transposed=False, - legacy_renum_only=False, symmetrize=None, ): """ @@ -168,13 +167,6 @@ def from_cudf_edgelist( If True, stores the transpose of the adjacency matrix. Required for certain algorithms. - legacy_renum_only : bool, optional (default=False) - If True, skips the C++ renumbering step. Must be true for - pylibcugraph algorithms. Must be false for algorithms - not yet converted to the pylibcugraph C API. - - This parameter is deprecated and will be removed. - symmetrize: bool, optional (default=None) If True, symmetrize the edge list for an undirected graph. Setting this flag to True for a directed graph returns an error. The default @@ -210,7 +202,6 @@ def from_cudf_edgelist( edge_type=edge_type, renumber=renumber, store_transposed=store_transposed, - legacy_renum_only=legacy_renum_only, symmetrize=symmetrize, ) @@ -306,7 +297,6 @@ def from_dask_cudf_edgelist( edge_type=None, renumber=True, store_transposed=False, - legacy_renum_only=False, ): """ Initializes the distributed graph from the dask_cudf.DataFrame @@ -353,13 +343,6 @@ def from_dask_cudf_edgelist( If True, stores the transpose of the adjacency matrix. Required for certain algorithms. - legacy_renum_only : bool, optional (default=False) - If True, skips the C++ renumbering step. Must be true for - pylibcugraph algorithms. Must be false for algorithms - not yet converted to the pylibcugraph C API. - - This parameter is deprecated and will be removed. - """ if self._Impl is None: @@ -378,7 +361,6 @@ def from_dask_cudf_edgelist( edge_type=edge_type, renumber=renumber, store_transposed=store_transposed, - legacy_renum_only=legacy_renum_only, ) # Move to Compat Module @@ -869,7 +851,6 @@ def from_cudf_edgelist( edge_attr=None, renumber=True, store_transposed=False, - legacy_renum_only=False, ): """ Initialize a graph from the edge list. It is an error to call this @@ -909,13 +890,6 @@ def from_cudf_edgelist( If True, stores the transpose of the adjacency matrix. Required for certain algorithms. - legacy_renum_only : bool, optional (default=False) - If True, skips the C++ renumbering step. Must be true for - pylibcugraph algorithms. Must be false for algorithms - not yet converted to the pylibcugraph C API. - - This parameter is deprecated and will be removed. - Examples -------- >>> df = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', @@ -945,7 +919,6 @@ def from_dask_cudf_edgelist( edge_attr=None, renumber=True, store_transposed=False, - legacy_renum_only=False, ): """ Initializes the distributed graph from the dask_cudf.DataFrame @@ -980,12 +953,6 @@ def from_dask_cudf_edgelist( If True, stores the transpose of the adjacency matrix. Required for certain algorithms. - legacy_renum_only : bool, optional (default=False) - If True, skips the C++ renumbering step. Must be true for - pylibcugraph algorithms. Must be false for algorithms - not yet converted to the pylibcugraph C API. - - This parameter is deprecated and will be removed. """ raise TypeError("Distributed N-partite graph not supported") diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 83dad234287..ced72a6bbe2 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -14,6 +14,7 @@ import gc from typing import Union, Iterable import warnings +from typing import Tuple import cudf import cupy as cp @@ -31,6 +32,7 @@ degrees as pylibcugraph_degrees, in_degrees as pylibcugraph_in_degrees, out_degrees as pylibcugraph_out_degrees, + decompress_to_edgelist as pylibcugraph_decompress_to_edgelist, ) from cugraph.structure.number_map import NumberMap @@ -172,7 +174,6 @@ def __from_edgelist( edge_type=None, renumber=True, store_transposed=False, - legacy_renum_only=False, symmetrize=None, ): if not isinstance(input_ddf, dask_cudf.DataFrame): @@ -333,9 +334,7 @@ def __from_edgelist( # the edgelist_df and not do any renumbering. # C++ renumbering is enabled by default for algorithms that # support it (but only called if renumbering is on) - self.compute_renumber_edge_list( - transposed=store_transposed, legacy_renum_only=legacy_renum_only - ) + self.compute_renumber_edge_list(transposed=store_transposed) if renumber is False: self.properties.renumbered = False @@ -979,6 +978,84 @@ def convert_to_cudf(cp_arrays): return ddf + def decompress_to_edgelist( + self, return_unrenumbered_edgelist: bool = True + ) -> dask_cudf.DataFrame: + """ + Extract a the edgelist from a graph. + + Parameters + ---------- + return_unrenumbered_edgelist : bool (default=True) + Flag determining whether to return the original + input edgelist if 'True' or the renumbered one + of 'False' and the edgelist was renumbered. + + Returns + ------- + df : dask_cudf.cudf.DataFrame + Distributed GPU data frame containing all induced sources identifiers, + destination identifiers, and if applicable edge weights, edge ids and + edge types + """ + + # Initialize dask client + client = default_client() + + do_expensive_check = False + + def _call_decompress_to_edgelist( + sID: bytes, + mg_graph_x, + do_expensive_check: bool, + ) -> Tuple[cp.ndarray, cp.ndarray, cp.ndarray, cp.ndarray]: + return pylibcugraph_decompress_to_edgelist( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + do_expensive_check=do_expensive_check, + ) + + result = [ + client.submit( + _call_decompress_to_edgelist, + Comms.get_session_id(), + self._plc_graph[w], + do_expensive_check, + ) + for w in Comms.get_workers() + ] + wait(result) + + def convert_to_cudf(cp_arrays: cp.ndarray) -> cudf.DataFrame: + cp_src, cp_dst, cp_weight, cp_edge_ids, cp_edge_type_ids = cp_arrays + + df = cudf.DataFrame() + df["src"] = cp_src + df["dst"] = cp_dst + if cp_weight is not None: + df["weight"] = cp_weight + if cp_edge_ids is not None: + df["edge_ids"] = cp_edge_ids + if cp_edge_type_ids is not None: + df["edge_type_ids"] = cp_edge_type_ids + + return df + + cudf_result = [ + client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result + ] + + wait(cudf_result) + + ddf = dask_cudf.from_delayed(cudf_result).persist() + wait(ddf) + + if self.properties.renumbered and return_unrenumbered_edgelist: + ddf = self.renumber_map.unrenumber(ddf, "src") + ddf = self.renumber_map.unrenumber(ddf, "dst") + + return ddf + def select_random_vertices( self, random_state: int = None, num_vertices: int = None ) -> Union[dask_cudf.Series, dask_cudf.DataFrame]: @@ -1214,7 +1291,7 @@ def neighbors(self, n): ddf = self.edgelist.edgelist_df return ddf[ddf["src"] == n]["dst"].reset_index(drop=True) - def compute_renumber_edge_list(self, transposed=False, legacy_renum_only=False): + def compute_renumber_edge_list(self, transposed=False): """ Compute a renumbered edge list This function works in the MNMG pipeline and will transform @@ -1237,20 +1314,9 @@ def compute_renumber_edge_list(self, transposed=False, legacy_renum_only=False): structure. If False, renumber with the intent to make a CSR-like structure. Defaults to False. - legacy_renum_only : (optional) bool - if True, The C++ renumbering will not be triggered. - This parameter is added for new algos following the - C/Pylibcugraph path - This parameter is deprecated and will be removed. """ - if legacy_renum_only: - warning_msg = ( - "The parameter 'legacy_renum_only' is deprecated and will be removed." - ) - warnings.warn(warning_msg, DeprecationWarning) - if not self.properties.renumber: self.edgelist = self.EdgeList(self.input_df) self.renumber_map = None @@ -1269,7 +1335,6 @@ def compute_renumber_edge_list(self, transposed=False, legacy_renum_only=False): self.source_columns, self.destination_columns, store_transposed=transposed, - legacy_renum_only=legacy_renum_only, ) self.edgelist = self.EdgeList(renumbered_ddf) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 858b114ebdc..4523b7f13b8 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -14,6 +14,7 @@ from cugraph.structure import graph_primtypes_wrapper from cugraph.structure.replicate_edgelist import replicate_cudf_dataframe from cugraph.structure.symmetrize import symmetrize as symmetrize_df +from pylibcugraph import decompress_to_edgelist as pylibcugraph_decompress_to_edgelist from cugraph.structure.number_map import NumberMap import cugraph.dask.common.mg_utils as mg_utils import cudf @@ -132,17 +133,9 @@ def __from_edgelist( edge_id=None, edge_type=None, renumber=True, - legacy_renum_only=False, store_transposed=False, symmetrize=None, ): - if legacy_renum_only: - warning_msg = ( - "The parameter 'legacy_renum_only' is deprecated and will be removed." - ) - warnings.warn( - warning_msg, - ) if self.properties.directed and symmetrize: raise ValueError( @@ -266,11 +259,7 @@ def __from_edgelist( if renumber: # FIXME: Should SG do lazy evaluation like MG? elist, renumber_map = NumberMap.renumber( - elist, - source, - destination, - store_transposed=False, - legacy_renum_only=legacy_renum_only, + elist, source, destination, store_transposed=False ) source = renumber_map.renumbered_src_col_name destination = renumber_map.renumbered_dst_col_name @@ -312,6 +301,8 @@ def __from_edgelist( # FIXME: if the user calls self.edgelist.edgelist_df after creating a # symmetric graph, return the symmetric edgelist? + # FIXME: For better memory footprint, avoid storing this edgelist and instead + # call decompress_to_edgelist to extract the edgelist from the graph self.edgelist = simpleGraphImpl.EdgeList( elist[source], elist[destination], value_col ) @@ -804,6 +795,64 @@ def get_two_hop_neighbors(self, start_vertices=None): return df + def decompress_to_edgelist( + self, return_unrenumbered_edgelist: bool = True + ) -> cudf.DataFrame: + """ + Extract a the edgelist from a graph. + + Parameters + ---------- + return_unrenumbered_edgelist : bool (default=True) + Flag determining whether to return the original input edgelist + if 'True' or the renumbered one of 'False' and the edgelist was + renumbered. + + Returns + ------- + + df : cudf.DataFrame + GPU data frame containing all sources identifiers, + destination identifiers and if applicable edge weights, edge ids and + edge types + + Examples + -------- + >>> from cugraph.datasets import karate + >>> G = karate.get_graph(download=True) + >>> edgelist = G.decompress_to_edgelist() + + """ + + do_expensive_check = False + ( + source, + destination, + weight, + edge_ids, + edge_type_ids, + ) = pylibcugraph_decompress_to_edgelist( + resource_handle=ResourceHandle(), + graph=self._plc_graph, + do_expensive_check=do_expensive_check, + ) + + df = cudf.DataFrame() + df["src"] = source + df["dst"] = destination + if weight is not None: + df["weight"] = weight + if edge_ids is not None: + df["edge_ids"] = edge_ids + if edge_type_ids is not None: + df["edge_type_ids"] = edge_type_ids + + if self.properties.renumbered and return_unrenumbered_edgelist: + df, _ = self.renumber_map.unrenumber(df, "src", get_column_names=True) + df, _ = self.renumber_map.unrenumber(df, "dst", get_column_names=True) + + return df + def select_random_vertices( self, random_state: int = None, diff --git a/python/cugraph/cugraph/structure/number_map.py b/python/cugraph/cugraph/structure/number_map.py index b0118fee960..39738daff36 100644 --- a/python/cugraph/cugraph/structure/number_map.py +++ b/python/cugraph/cugraph/structure/number_map.py @@ -18,7 +18,6 @@ import dask_cudf import numpy as np import cudf -import warnings class NumberMap: @@ -462,12 +461,7 @@ def from_internal_vertex_id( @staticmethod def renumber_and_segment( - df, - src_col_names, - dst_col_names, - preserve_order=False, - store_transposed=False, - legacy_renum_only=False, + df, src_col_names, dst_col_names, preserve_order=False, store_transposed=False ): """ Given an input dataframe with its column names, this function returns the @@ -475,11 +469,6 @@ def renumber_and_segment( to external vertex IDs. the parameter 'preserve_order' ensures that the order of the edges is preserved during renumbering. """ - if legacy_renum_only: - warning_msg = ( - "The parameter 'legacy_renum_only' is deprecated and will be removed." - ) - warnings.warn(warning_msg, DeprecationWarning) renumbered = False @@ -584,20 +573,10 @@ def renumber_and_segment( @staticmethod def renumber( - df, - src_col_names, - dst_col_names, - preserve_order=False, - store_transposed=False, - legacy_renum_only=False, + df, src_col_names, dst_col_names, preserve_order=False, store_transposed=False ): return NumberMap.renumber_and_segment( - df, - src_col_names, - dst_col_names, - preserve_order, - store_transposed, - legacy_renum_only, + df, src_col_names, dst_col_names, preserve_order, store_transposed )[0:2] def unrenumber(self, df, column_name, preserve_order=False, get_column_names=False): diff --git a/python/cugraph/cugraph/testing/__init__.py b/python/cugraph/cugraph/testing/__init__.py index 2b4a4fd3ebf..5c89159bcff 100644 --- a/python/cugraph/cugraph/testing/__init__.py +++ b/python/cugraph/cugraph/testing/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,6 +38,7 @@ cit_patents, europe_osm, hollywood, + amazon0302, # twitter, ) @@ -71,4 +72,10 @@ toy_graph_undirected, ] DEFAULT_DATASETS = [dolphins, netscience, karate_disjoint] -BENCHMARKING_DATASETS = [soc_livejournal, cit_patents, europe_osm, hollywood] +BENCHMARKING_DATASETS = [ + soc_livejournal, + cit_patents, + europe_osm, + hollywood, + amazon0302, +] diff --git a/python/cugraph/cugraph/tests/core/test_core_number.py b/python/cugraph/cugraph/tests/core/test_core_number.py index a01b837ff61..b50e60ceb89 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number.py +++ b/python/cugraph/cugraph/tests/core/test_core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,11 +32,15 @@ def setup_function(): # ============================================================================= # Pytest fixtures # ============================================================================= -degree_type = ["incoming", "outgoing"] +# FIXME: degree_type is currently unsupported (ignored) +# degree_type = ["incoming", "outgoing"] +# fixture_params = gen_fixture_params_product( +# (UNDIRECTED_DATASETS, "graph_file"), +# (degree_type, "degree_type"), +# ) fixture_params = gen_fixture_params_product( (UNDIRECTED_DATASETS, "graph_file"), - (degree_type, "degree_type"), ) @@ -46,7 +50,9 @@ def input_combo(request): This fixture returns a dictionary containing all input params required to run a Core number algo """ - parameters = dict(zip(("graph_file", "degree_type"), request.param)) + # FIXME: degree_type is not supported so do not test with different values + # parameters = dict(zip(("graph_file", "degree_type"), request.param)) + parameters = {"graph_file": request.param[0]} graph_file = parameters["graph_file"] G = graph_file.get_graph() @@ -69,7 +75,8 @@ def input_combo(request): def test_core_number(input_combo): G = input_combo["G"] Gnx = input_combo["Gnx"] - degree_type = input_combo["degree_type"] + # FIXME: degree_type is currently unsupported (ignored) + # degree_type = input_combo["degree_type"] nx_core_number_results = cudf.DataFrame() dic_results = nx.core_number(Gnx) @@ -80,7 +87,7 @@ def test_core_number(input_combo): ) core_number_results = ( - cugraph.core_number(G, degree_type) + cugraph.core_number(G) .sort_values("vertex") .reset_index(drop=True) .rename(columns={"core_number": "cugraph_core_number"}) @@ -109,8 +116,3 @@ def test_core_number_invalid_input(input_combo): with pytest.raises(ValueError): cugraph.core_number(G) - - invalid_degree_type = "invalid" - G = input_combo["G"] - with pytest.raises(ValueError): - cugraph.core_number(G, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/core/test_core_number_mg.py b/python/cugraph/cugraph/tests/core/test_core_number_mg.py index 1138c1dc488..2c2c7e40a22 100644 --- a/python/cugraph/cugraph/tests/core/test_core_number_mg.py +++ b/python/cugraph/cugraph/tests/core/test_core_number_mg.py @@ -17,7 +17,7 @@ import cugraph import cugraph.dask as dcg -from cugraph.datasets import karate, dolphins, karate_asymmetric +from cugraph.datasets import karate, dolphins # ============================================================================= @@ -35,7 +35,8 @@ def setup_function(): DATASETS = [karate, dolphins] -DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"] +# FIXME: degree_type is currently unsupported (ignored) +# DEGREE_TYPE = ["incoming", "outgoing", "bidirectional"] # ============================================================================= @@ -43,9 +44,9 @@ def setup_function(): # ============================================================================= -def get_sg_results(dataset, degree_type): +def get_sg_results(dataset): G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) - res = cugraph.core_number(G, degree_type) + res = cugraph.core_number(G) res = res.sort_values("vertex").reset_index(drop=True) return res @@ -57,23 +58,23 @@ def get_sg_results(dataset, degree_type): @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) -@pytest.mark.parametrize("degree_type", DEGREE_TYPE) -def test_sg_core_number(dask_client, dataset, degree_type, benchmark): +# @pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_sg_core_number(dask_client, dataset, benchmark): # This test is only for benchmark purposes. sg_core_number_results = None G = dataset.get_graph(create_using=cugraph.Graph(directed=False)) - sg_core_number_results = benchmark(cugraph.core_number, G, degree_type) + sg_core_number_results = benchmark(cugraph.core_number, G) assert sg_core_number_results is not None @pytest.mark.mg @pytest.mark.parametrize("dataset", DATASETS) -@pytest.mark.parametrize("degree_type", DEGREE_TYPE) -def test_core_number(dask_client, dataset, degree_type, benchmark): +# @pytest.mark.parametrize("degree_type", DEGREE_TYPE) +def test_core_number(dask_client, dataset, benchmark): dataset.get_dask_edgelist(download=True) # reload with MG edgelist dg = dataset.get_dask_graph(create_using=cugraph.Graph(directed=False)) - result_core_number = benchmark(dcg.core_number, dg, degree_type) + result_core_number = benchmark(dcg.core_number, dg) result_core_number = ( result_core_number.drop_duplicates() .compute() @@ -82,7 +83,7 @@ def test_core_number(dask_client, dataset, degree_type, benchmark): .rename(columns={"core_number": "mg_core_number"}) ) - expected_output = get_sg_results(dataset, degree_type) + expected_output = get_sg_results(dataset) # Update the mg core number with sg core number results # for easy comparison using cuDF DataFrame methods. @@ -90,13 +91,3 @@ def test_core_number(dask_client, dataset, degree_type, benchmark): counts_diffs = result_core_number.query("mg_core_number != sg_core_number") assert len(counts_diffs) == 0 - - -@pytest.mark.mg -def test_core_number_invalid_input(): - dg = karate_asymmetric.get_graph(create_using=cugraph.Graph(directed=True)) - - invalid_degree_type = 3 - - with pytest.raises(ValueError): - dcg.core_number(dg, invalid_degree_type) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py index c9fb73babb8..ed3a796121c 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py @@ -156,12 +156,10 @@ def networkx_call(M, benchmark_callable=None): # FIXME: This compare is shared across several tests... it should be # a general utility -def compare(src1, dst1, val1, src2, dst2, val2): - # +def assert_results_equal(src1, dst1, val1, src2, dst2, val2): # We will do comparison computations by using dataframe # merge functions (essentially doing fast joins). We # start by making two data frames - # df1 = cudf.DataFrame() df1["src1"] = src1 df1["dst1"] = dst1 @@ -174,19 +172,18 @@ def compare(src1, dst1, val1, src2, dst2, val2): if val2 is not None: df2["val2"] = val2 - # - # Check to see if all pairs in the original data frame - # still exist in the new data frame. If we join (merge) - # the data frames where (src1[i]=src2[i]) and (dst1[i]=dst2[i]) - # then we should get exactly the same number of entries in - # the data frame if we did not lose any data. - # + # Check to see if all pairs in df1 still exist in the new (merged) data + # frame. If we join (merge) the data frames where (src1[i]=src2[i]) and + # (dst1[i]=dst2[i]) then we should get exactly the same number of entries + # in the data frame if we did not lose any data. join = df1.merge(df2, left_on=["src1", "dst1"], right_on=["src2", "dst2"]) + # Print detailed differences on test failure if len(df1) != len(join): join2 = df1.merge( df2, how="left", left_on=["src1", "dst1"], right_on=["src2", "dst2"] ) + orig_option = pd.get_option("display.max_rows") pd.set_option("display.max_rows", 500) print("df1 = \n", df1.sort_values(["src1", "dst1"])) print("df2 = \n", df2.sort_values(["src2", "dst2"])) @@ -196,6 +193,7 @@ def compare(src1, dst1, val1, src2, dst2, val2): .to_pandas() .query("src2.isnull()", engine="python"), ) + pd.set_option("display.max_rows", orig_option) assert len(df1) == len(join) @@ -485,7 +483,7 @@ def test_all_pairs_jaccard_with_topk(): worst_coeff = all_pairs_jaccard_results["jaccard_coeff"].min() better_than_k = jaccard_results[jaccard_results["jaccard_coeff"] > worst_coeff] - compare( + assert_results_equal( all_pairs_jaccard_results["first"], all_pairs_jaccard_results["second"], all_pairs_jaccard_results["jaccard_coeff"], @@ -494,7 +492,7 @@ def test_all_pairs_jaccard_with_topk(): jaccard_results["jaccard_coeff"], ) - compare( + assert_results_equal( better_than_k["first"], better_than_k["second"], better_than_k["jaccard_coeff"], diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_random_walks.py index 9c94e036683..76ceb478518 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION.: +# Copyright (c) 2020-2024, NVIDIA CORPORATION.: # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -222,7 +222,6 @@ def test_random_walks_invalid_max_dept(graph_file, directed, max_depth): @pytest.mark.sg -@pytest.mark.cugraph_ops @pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks_coalesced(graph_file, directed): @@ -246,7 +245,6 @@ def test_random_walks_coalesced(graph_file, directed): @pytest.mark.sg -@pytest.mark.cugraph_ops @pytest.mark.parametrize("graph_file", SMALL_DATASETS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks_padded_0(graph_file, directed): @@ -271,7 +269,6 @@ def test_random_walks_padded_0(graph_file, directed): @pytest.mark.sg -@pytest.mark.cugraph_ops def test_random_walks_padded_1(): max_depth = random.randint(2, 10) @@ -294,7 +291,6 @@ def test_random_walks_padded_1(): @pytest.mark.sg -@pytest.mark.cugraph_ops @pytest.mark.parametrize("graph_file", SMALL_DATASETS) def test_random_walks_nx(graph_file): G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py index 34eeb2902f8..96b34c638b5 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py @@ -203,7 +203,6 @@ def input_graph(request): @pytest.mark.mg -@pytest.mark.cugraph_ops def test_dask_mg_random_walks(dask_client, input_graph): path_data, seeds, max_depth = calc_random_walks(input_graph) df_G = input_graph.input_df.compute().reset_index(drop=True) diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index ad0dbe77f7d..65687a1a227 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -130,7 +130,6 @@ def simple_unweighted_input_expected_output(request): # Tests # ============================================================================= @pytest.mark.sg -@pytest.mark.cugraph_ops def test_uniform_neighbor_sample_simple(input_combo): G = input_combo["Graph"] @@ -229,7 +228,6 @@ def test_uniform_neighbor_sample_simple(input_combo): @pytest.mark.sg -@pytest.mark.cugraph_ops @pytest.mark.parametrize("directed", IS_DIRECTED) def test_uniform_neighbor_sample_tree(directed): @@ -301,7 +299,6 @@ def test_uniform_neighbor_sample_tree(directed): @pytest.mark.sg -@pytest.mark.cugraph_ops def test_uniform_neighbor_sample_unweighted(simple_unweighted_input_expected_output): test_data = simple_unweighted_input_expected_output @@ -322,7 +319,6 @@ def test_uniform_neighbor_sample_unweighted(simple_unweighted_input_expected_out @pytest.mark.sg -@pytest.mark.cugraph_ops @pytest.mark.parametrize("return_offsets", [True, False]) @pytest.mark.parametrize("include_hop_column", [True, False]) def test_uniform_neighbor_sample_edge_properties(return_offsets, include_hop_column): diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 4a85b49a66e..6343b0ff9f3 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -131,7 +131,6 @@ def input_combo(request): # Tests # ============================================================================= @pytest.mark.mg -@pytest.mark.cugraph_ops def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo): dg = input_combo["MGGraph"] @@ -220,7 +219,6 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo): @pytest.mark.mg -@pytest.mark.cugraph_ops @pytest.mark.parametrize("directed", IS_DIRECTED) def test_mg_uniform_neighbor_sample_tree(dask_client, directed): @@ -286,7 +284,6 @@ def test_mg_uniform_neighbor_sample_tree(dask_client, directed): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="FIXME: MG test fails on single-GPU") -@pytest.mark.cugraph_ops def test_mg_uniform_neighbor_sample_unweighted(dask_client): df = cudf.DataFrame( { @@ -321,7 +318,6 @@ def test_mg_uniform_neighbor_sample_unweighted(dask_client): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="FIXME: MG test fails on single-GPU") -@pytest.mark.cugraph_ops def test_mg_uniform_neighbor_sample_ensure_no_duplicates(dask_client): # See issue #2760 # This ensures that the starts are properly distributed @@ -347,7 +343,6 @@ def test_mg_uniform_neighbor_sample_ensure_no_duplicates(dask_client): @pytest.mark.mg -@pytest.mark.cugraph_ops @pytest.mark.parametrize("return_offsets", [True, False]) def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets): n_workers = len(dask_client.scheduler_info()["workers"]) diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index b3e517100e1..6fcfef726b1 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -179,6 +179,58 @@ def test_add_edge_list_to_adj_list(graph_file): assert values_cu is None +@pytest.mark.sg +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("is_directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +def test_decompress_to_edgelist(graph_file, is_directed, renumber): + input_df = utils.read_csv_file(graph_file) + input_df = input_df.rename(columns={"0": "src", "1": "dst", "2": "weight"}) + + G = cugraph.Graph(directed=is_directed) + input_df_ = cudf.DataFrame() + if renumber: + input_df_["src_0"] = cudf.Series(input_df["src"]) + input_df_["dst_0"] = cudf.Series(input_df["dst"]) + input_df_["weight"] = cudf.Series(input_df["weight"]) + input_df_["src_1"] = input_df_["src_0"] + 1000 + input_df_["dst_1"] = input_df_["dst_0"] + 1000 + + input_df = input_df_ + source = ["src_0", "src_1"] + destination = ["dst_0", "dst_1"] + else: + source = "src" + destination = "dst" + + G.from_cudf_edgelist( + input_df, source=source, destination=destination, weight="weight", renumber=True + ) + + extracted_df = G.decompress_to_edgelist(return_unrenumbered_edgelist=True) + + if renumber: + extracted_df = extracted_df.rename( + columns={ + "0_src": "src_0", + "1_src": "src_1", + "0_dst": "dst_0", + "1_dst": "dst_1", + } + ) + extracted_df = extracted_df.sort_values( + ["src_0", "src_1", "dst_0", "dst_1"] + ).reset_index(drop=True) + input_df = input_df.sort_values( + ["src_0", "src_1", "dst_0", "dst_1"] + ).reset_index(drop=True) + else: + extracted_df = extracted_df.sort_values(["src", "dst"]).reset_index(drop=True) + input_df = input_df.sort_values(["src", "dst"]).reset_index(drop=True) + + assert_frame_equal(input_df, extracted_df, check_dtype=False, check_like=True) + + # Test @pytest.mark.sg @pytest.mark.parametrize("graph_file", utils.DATASETS) diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index f2cc1583f93..e5eeb0f653b 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -420,3 +420,57 @@ def test_graph_creation_properties(dask_client, graph_file, directed, renumber): assert sG.number_of_nodes() == mG.number_of_nodes() assert sG.number_of_edges() == mG.number_of_edges() assert_frame_equal(sG_edgelist_view, mG_edgelist_view, check_dtype=False) + + +@pytest.mark.parametrize("directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +@pytest.mark.parametrize("graph_file", datasets) +def test_decompress_to_edgelist(dask_client, graph_file, directed, renumber): + input_df = utils.read_csv_file(graph_file) + input_df = input_df.rename(columns={"0": "src", "1": "dst", "2": "weight"}) + + G = cugraph.Graph(directed=directed) + input_df_ = cudf.DataFrame() + if renumber: + input_df_["src_0"] = cudf.Series(input_df["src"]) + input_df_["dst_0"] = cudf.Series(input_df["dst"]) + input_df_["weight"] = cudf.Series(input_df["weight"]) + input_df_["src_1"] = input_df_["src_0"] + 1000 + input_df_["dst_1"] = input_df_["dst_0"] + 1000 + + input_df = input_df_ + source = ["src_0", "src_1"] + destination = ["dst_0", "dst_1"] + else: + source = "src" + destination = "dst" + num_workers = len(Comms.get_workers()) + + input_ddf = dask_cudf.from_cudf(input_df, npartitions=num_workers) + + G = cugraph.Graph(directed=True) + G.from_dask_cudf_edgelist( + input_ddf, source=source, destination=destination, weight="weight" + ) + + extracted_df = ( + G.decompress_to_edgelist(return_unrenumbered_edgelist=True) + .compute() + .reset_index(drop=True) + ) + + if renumber: + extracted_df = extracted_df.rename( + columns={ + "0_src": "src_0", + "1_src": "src_1", + "0_dst": "dst_0", + "1_dst": "dst_1", + } + ) + extracted_df = extracted_df.sort_values( + ["src_0", "src_1", "dst_0", "dst_1"] + ).reset_index(drop=True) + input_df = input_df.sort_values( + ["src_0", "src_1", "dst_0", "dst_1"] + ).reset_index(drop=True) diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index 3873cd1c3e4..9895eb61c82 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -104,6 +104,7 @@ def is_symmetric(dataset): return True else: df = dataset.get_edgelist(download=True) + df.rename(columns={df.columns[0]: "src", df.columns[1]: "dst"}, inplace=True) df_a = df.sort_values("src") # create df with swapped src/dst columns diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py index 5bad68a79e2..0257da4ffc0 100644 --- a/python/cugraph/cugraph/utilities/utils.py +++ b/python/cugraph/cugraph/utilities/utils.py @@ -528,7 +528,7 @@ def create_list_series_from_2d_ar(ar, index): cp.arange(start=0, stop=len(data) + 1, step=n_cols), dtype="int32" ) mask_col = cp.full(shape=n_rows, fill_value=True) - mask = cudf._lib.transform.bools_to_mask(as_column(mask_col)) + mask = as_column(mask_col).as_mask() lc = cudf.core.column.ListColumn( data=None, size=n_rows, diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml index d3960ab5d32..9a465d789e2 100644 --- a/python/cugraph/pyproject.toml +++ b/python/cugraph/pyproject.toml @@ -23,18 +23,18 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "cudf==24.12.*,>=0.0.0a0", + "cudf==25.2.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", - "dask-cuda==24.12.*,>=0.0.0a0", - "dask-cudf==24.12.*,>=0.0.0a0", + "dask-cuda==25.2.*,>=0.0.0a0", + "dask-cudf==25.2.*,>=0.0.0a0", "fsspec[http]>=0.6.0", "numba>=0.57", "numpy>=1.23,<3.0a0", - "pylibcugraph==24.12.*,>=0.0.0a0", - "raft-dask==24.12.*,>=0.0.0a0", - "rapids-dask-dependency==24.12.*,>=0.0.0a0", - "rmm==24.12.*,>=0.0.0a0", - "ucx-py==0.41.*,>=0.0.0a0", + "pylibcugraph==25.2.*,>=0.0.0a0", + "raft-dask==25.2.*,>=0.0.0a0", + "rapids-dask-dependency==25.2.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", + "ucx-py==0.42.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -49,7 +49,7 @@ test = [ "networkx>=2.5.1", "numpy>=1.23,<3.0a0", "pandas", - "pylibwholegraph==24.12.*,>=0.0.0a0", + "pylibwholegraph==25.2.*,>=0.0.0a0", "pytest", "pytest-benchmark", "pytest-cov", @@ -82,9 +82,9 @@ build-backend = "scikit_build_core.build" requires = [ "cmake>=3.26.4,!=3.30.0", "ninja", - "pylibcugraph==24.12.*,>=0.0.0a0", - "pylibraft==24.12.*,>=0.0.0a0", - "rmm==24.12.*,>=0.0.0a0", + "pylibcugraph==25.2.*,>=0.0.0a0", + "pylibraft==25.2.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" diff --git a/python/cugraph/pytest.ini b/python/cugraph/pytest.ini index bf6e6bdd802..7bdef2d9771 100644 --- a/python/cugraph/pytest.ini +++ b/python/cugraph/pytest.ini @@ -62,6 +62,7 @@ filterwarnings = error::FutureWarning error::DeprecationWarning # TODO + ignore:.*cuda..* module is deprecated.*:DeprecationWarning ignore:Multi is deprecated and the removal of multi edges will no longer be supported:FutureWarning ignore:The legacy column names:FutureWarning ignore:The include_hop_column flag is deprecated and will be removed:FutureWarning diff --git a/python/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/CMakeLists.txt index 045628e9c0d..22d788ff253 100644 --- a/python/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/CMakeLists.txt @@ -32,14 +32,8 @@ project( option(FIND_CUGRAPH_CPP "Search for existing CUGRAPH C++ installations before defaulting to local files" OFF ) -option(USE_CUGRAPH_OPS "Enable all functions that call cugraph-ops" ON) option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) -if(NOT USE_CUGRAPH_OPS) - message(STATUS "Disabling libcugraph functions that reference cugraph-ops") - add_compile_definitions(NO_CUGRAPH_OPS) -endif() - # If the user requested it we attempt to find CUGRAPH. if(FIND_CUGRAPH_CPP) find_package(cugraph "${RAPIDS_VERSION}" REQUIRED) @@ -54,14 +48,10 @@ if (NOT cugraph_FOUND) set(BUILD_TESTS OFF) set(BUILD_CUGRAPH_MG_TESTS OFF) - set(BUILD_CUGRAPH_OPS_CPP_TESTS OFF) set(CUDA_STATIC_RUNTIME ON) set(CUDA_STATIC_MATH_LIBRARIES ON) set(USE_RAFT_STATIC ON) set(CUGRAPH_COMPILE_RAFT_LIB ON) - set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) - set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) - set(ALLOW_CLONE_CUGRAPH_OPS ON) if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) set(CUDA_STATIC_MATH_LIBRARIES OFF) diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index fb46030bc56..fe7c4b64aa5 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -70,6 +70,7 @@ set(cython_sources homogeneous_biased_neighbor_sample.pyx homogeneous_uniform_neighbor_sample.pyx edge_id_lookup_table.pyx + decompress_to_edgelist.pyx ) set(linked_libraries cugraph::cugraph;cugraph::cugraph_c) diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index 5aa351f9ce1..9047144c13a 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -126,6 +126,8 @@ from pylibcugraph.degrees import in_degrees, out_degrees, degrees +from pylibcugraph.decompress_to_edgelist import decompress_to_edgelist + from pylibcugraph import exceptions diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 21f5190ad5f..38781614b20 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -245,6 +245,11 @@ cdef extern from "cugraph_c/algorithms.h": const cugraph_sample_result_t* result ) + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_sample_result_get_label_type_hop_offsets( + const cugraph_sample_result_t* result + ) + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_start_labels( const cugraph_sample_result_t* result diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd index b8f16cb94c8..b27a7230a13 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd @@ -122,41 +122,41 @@ cdef extern from "cugraph_c/graph_functions.h": ########################################################################### # induced_subgraph - ctypedef struct cugraph_induced_subgraph_result_t: + ctypedef struct cugraph_induced_subgraph_result_t: # Deprecated pass cdef cugraph_type_erased_device_array_view_t* \ - cugraph_induced_subgraph_get_sources( + cugraph_induced_subgraph_get_sources( # Deprecated cugraph_induced_subgraph_result_t* induced_subgraph ) cdef cugraph_type_erased_device_array_view_t* \ - cugraph_induced_subgraph_get_destinations( + cugraph_induced_subgraph_get_destinations( # Deprecated cugraph_induced_subgraph_result_t* induced_subgraph ) cdef cugraph_type_erased_device_array_view_t* \ - cugraph_induced_subgraph_get_edge_weights( + cugraph_induced_subgraph_get_edge_weights( # Deprecated cugraph_induced_subgraph_result_t* induced_subgraph ) cdef cugraph_type_erased_device_array_view_t* \ - cugraph_induced_subgraph_get_edge_ids( + cugraph_induced_subgraph_get_edge_ids( # Deprecated cugraph_induced_subgraph_result_t* induced_subgraph ) cdef cugraph_type_erased_device_array_view_t* \ - cugraph_induced_subgraph_get_edge_type_ids( + cugraph_induced_subgraph_get_edge_type_ids( # Deprecated cugraph_induced_subgraph_result_t* induced_subgraph ) cdef cugraph_type_erased_device_array_view_t* \ - cugraph_induced_subgraph_get_subgraph_offsets( + cugraph_induced_subgraph_get_subgraph_offsets( # Deprecated cugraph_induced_subgraph_result_t* induced_subgraph ) cdef void \ - cugraph_induced_subgraph_result_free( + cugraph_induced_subgraph_result_free( # Deprecated cugraph_induced_subgraph_result_t* induced_subgraph ) @@ -250,3 +250,52 @@ cdef extern from "cugraph_c/graph_functions.h": cugraph_degrees_result_free( cugraph_degrees_result_t* degrees_result ) + + ########################################################################### + # decompress to edgelist + ctypedef struct cugraph_edgelist_t: + pass + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_edgelist_get_sources( + cugraph_edgelist_t* edgelist + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_edgelist_get_destinations( + cugraph_edgelist_t* edgelist + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_edgelist_get_edge_weights( + cugraph_edgelist_t* edgelist + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_edgelist_get_edge_ids( + cugraph_edgelist_t* edgelist + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_edgelist_get_edge_type_ids( + cugraph_edgelist_t* edgelist + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_edgelist_get_edge_offsets( + cugraph_edgelist_t* edgelist + ) + + cdef void \ + cugraph_edgelist_free( + cugraph_edgelist_t* edgelist + ) + + cdef cugraph_error_code_t \ + cugraph_decompress_to_edgelist( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + bool_t do_expensive_check, + cugraph_edgelist_t** result, + cugraph_error_t** error + ) diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd index 762fd37a35d..f496cc7d880 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd @@ -73,6 +73,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_tyoe_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -88,6 +89,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_tyoe_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, diff --git a/python/pylibcugraph/pylibcugraph/core_number.pyx b/python/pylibcugraph/pylibcugraph/core_number.pyx index e754ef2c65e..48d9c5de429 100644 --- a/python/pylibcugraph/pylibcugraph/core_number.pyx +++ b/python/pylibcugraph/pylibcugraph/core_number.pyx @@ -66,14 +66,14 @@ def core_number(ResourceHandle resource_handle, referencing data and running algorithms. graph : SGGraph or MGGraph - The input graph, for either Single or Multi-GPU operations. + The input graph, for either single or multi-GPU operations. The input + graph must be symmetric (the is_symmetric property must be True). degree_type: str - This option determines if the core number computation should be based - on input, output, or both directed edges, with valid values being - "incoming", "outgoing", and "bidirectional" respectively. - This option is currently ignored in this release, and setting it will - result in a warning. + This option is currently ignored. This option may eventually determine + if the core number computation should be based on input, output, or + both directed edges, with valid values being "incoming", "outgoing", + and "bidirectional" respectively. do_expensive_check: bool If True, performs more extensive tests on the inputs to ensure @@ -98,14 +98,14 @@ def core_number(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - degree_type_map = { - "incoming": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, - "outgoing": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_OUT, - "bidirectional": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_INOUT} - + # When supported, degree_type string should be mapped to constant like so: + # degree_type_map = { + # "incoming": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, + # "outgoing": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_OUT, + # "bidirectional": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_INOUT} error_code = cugraph_core_number(c_resource_handle_ptr, c_graph_ptr, - degree_type_map[degree_type], + cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, do_expensive_check, &result_ptr, &error_ptr) diff --git a/python/pylibcugraph/pylibcugraph/decompress_to_edgelist.pyx b/python/pylibcugraph/pylibcugraph/decompress_to_edgelist.pyx new file mode 100644 index 00000000000..58c29940aba --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/decompress_to_edgelist.pyx @@ -0,0 +1,169 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + + +from pylibcugraph._cugraph_c.types cimport ( + bool_t, +) +from pylibcugraph._cugraph_c.resource_handle cimport ( + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.graph_functions cimport ( + cugraph_edgelist_t, + cugraph_decompress_to_edgelist, + cugraph_edgelist_get_sources, + cugraph_edgelist_get_destinations, + cugraph_edgelist_get_edge_weights, + cugraph_edgelist_get_edge_ids, + cugraph_edgelist_get_edge_type_ids, + cugraph_edgelist_get_edge_offsets, + cugraph_edgelist_free, +) + +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph.utils cimport ( + assert_success, + copy_to_cupy_array, + create_cugraph_type_erased_device_array_view_from_py_obj, +) + + +def decompress_to_edgelist(ResourceHandle resource_handle, + _GPUGraph graph, + bool_t do_expensive_check): + """ + Extract a the edgelist from a graph + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + graph : SGGraph or MGGraph + The input graph. + + do_expensive_check : bool_t + If True, performs more extensive tests on the inputs to ensure + validitity, at the expense of increased run time. + + Returns + ------- + A tuple of device arrays containing the sources, destinations and if applicable + edge_weights, edge_ids and/or edge_type_ids. + + Examples + -------- + >>> import pylibcugraph, cupy, numpy + >>> srcs = cupy.asarray([0, 1, 1, 2, 2, 2, 3, 4], dtype=numpy.int32) + >>> dsts = cupy.asarray([1, 3, 4, 0, 1, 3, 5, 5], dtype=numpy.int32) + >>> weights = cupy.asarray( + ... [0.1, 2.1, 1.1, 5.1, 3.1, 4.1, 7.2, 3.2], dtype=numpy.float32) + >>> resource_handle = pylibcugraph.ResourceHandle() + >>> graph_props = pylibcugraph.GraphProperties( + ... is_symmetric=False, is_multigraph=False) + >>> G = pylibcugraph.SGGraph( + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, + ... store_transposed=False, renumber=False, do_expensive_check=False) + >>> (sources, destinations, edge_weights, _, _) = + ... pylibcugraph.decompress_to_edgelist( + ... resource_handle, G, False) + >>> sources + [0, 1, 1, 2, 2, 2, 3, 4] + >>> destinations + [1, 3, 4, 0, 1, 3, 5, 5] + >>> edge_weights + [0.1, 2.1, 1.1, 5.1, 3.1, 4.1, 7.2, 3.2] + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + cdef cugraph_edgelist_t* result_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + error_code = cugraph_decompress_to_edgelist(c_resource_handle_ptr, + c_graph_ptr, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_decompress_to_edgelist") + + # Extract individual device array pointers from result and copy to cupy + # arrays for returning. + cdef cugraph_type_erased_device_array_view_t* sources_ptr = \ + cugraph_edgelist_get_sources(result_ptr) + cdef cugraph_type_erased_device_array_view_t* destinations_ptr = \ + cugraph_edgelist_get_destinations(result_ptr) + cdef cugraph_type_erased_device_array_view_t* edge_weights_ptr = \ + cugraph_edgelist_get_edge_weights(result_ptr) + + cdef cugraph_type_erased_device_array_view_t* edge_ids_ptr = \ + cugraph_edgelist_get_edge_ids(result_ptr) + cdef cugraph_type_erased_device_array_view_t* edge_type_ids_ptr = \ + cugraph_edgelist_get_edge_type_ids(result_ptr) + + + """ + cdef cugraph_type_erased_device_array_view_t* subgraph_offsets_ptr = \ + cugraph_edgelist_get_edge_offsets(result_ptr) + """ + + # FIXME: Get ownership of the result data instead of performing a copy + # for perfomance improvement + cupy_edge_weights = None + cupy_edge_ids = None + cupy_edge_type_ids = None + cupy_sources = copy_to_cupy_array( + c_resource_handle_ptr, sources_ptr) + cupy_destinations = copy_to_cupy_array( + c_resource_handle_ptr, destinations_ptr) + if edge_weights_ptr != NULL: + cupy_edge_weights = copy_to_cupy_array( + c_resource_handle_ptr, edge_weights_ptr) + if edge_ids_ptr != NULL: + cupy_edge_ids = copy_to_cupy_array( + c_resource_handle_ptr, edge_ids_ptr) + if edge_type_ids_ptr != NULL: + cupy_edge_type_ids = copy_to_cupy_array( + c_resource_handle_ptr, edge_type_ids_ptr) + + """ + cupy_subgraph_offsets = copy_to_cupy_array( + c_resource_handle_ptr, subgraph_offsets_ptr) + """ + + # Free pointer + cugraph_edgelist_free(result_ptr) + + return (cupy_sources, cupy_destinations, + cupy_edge_weights, cupy_edge_ids, cupy_edge_type_ids) diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx index ecdfba3afc5..ee0e85fa3bd 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx @@ -87,6 +87,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, _GPUGraph input_graph, start_vertex_list, starting_vertex_label_offsets, + vertex_type_offsets, h_fan_out, num_edge_types, bool_t with_replacement, @@ -124,6 +125,9 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) + vertex_type_offsets: device array type (Optional) + Offsets for each vertex type in the graph. + h_fan_out: numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level. The fanout value at each hop for each @@ -247,6 +251,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(start_vertex_list, "start_vertex_list") assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True) + assert_CAI_type(vertex_type_offsets, "vertex_type_offsets", True) assert_AI_type(h_fan_out, "h_fan_out") @@ -277,6 +282,11 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t cai_vertex_type_offsets_ptr + if vertex_type_offsets is not None: + cai_vertex_type_offsets_ptr = \ + vertex_type_offsets.__cuda_array_interface__['data'][0] + cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \ cugraph_type_erased_device_array_view_create( @@ -294,6 +304,16 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, SIZE_T ) + cdef cugraph_type_erased_device_array_view_t* vertex_type_offsets_ptr = NULL + if vertex_type_offsets is not None: + vertex_type_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_vertex_type_offsets_ptr, + len(vertex_type_offsets), + SIZE_T + ) + + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL if retain_seeds: if starting_vertex_label_offsets is None: @@ -354,6 +374,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, NULL, # FIXME: Add support for biased neighbor sampling start_vertex_list_ptr, starting_vertex_label_offsets_ptr, + vertex_type_offsets_ptr, fan_out_ptr, num_edge_types, sampling_options, @@ -388,7 +409,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, cupy_edge_types = result.get_edge_types() cupy_batch_ids = result.get_batch_ids() cupy_label_hop_offsets = result.get_label_hop_offsets() - + cupy_label_type_hop_offsets = result.get_label_type_hop_offsets() if renumber: cupy_renumber_map = result.get_renumber_map() @@ -405,6 +426,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, 'renumber_map_offsets': cupy_renumber_map_offsets, @@ -422,6 +444,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, } # Return everything that isn't null diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx index 3fa3575e27d..dbee65323d7 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx @@ -84,6 +84,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, _GPUGraph input_graph, start_vertex_list, starting_vertex_label_offsets, + vertex_type_offsets, h_fan_out, num_edge_types, bool_t with_replacement, @@ -119,6 +120,9 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) + vertex_type_offsets: device array type (Optional) + Offsets for each vertex type in the graph. + h_fan_out: numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level. The fanout value at each hop for each @@ -242,6 +246,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(start_vertex_list, "start_vertex_list") assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True) + assert_CAI_type(vertex_type_offsets, "vertex_type_offsets", True) assert_AI_type(h_fan_out, "h_fan_out") @@ -271,6 +276,11 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t cai_vertex_type_offsets_ptr + if vertex_type_offsets is not None: + cai_vertex_type_offsets_ptr = \ + vertex_type_offsets.__cuda_array_interface__['data'][0] + cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \ cugraph_type_erased_device_array_view_create( @@ -288,6 +298,15 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, SIZE_T ) + cdef cugraph_type_erased_device_array_view_t* vertex_type_offsets_ptr = NULL + if vertex_type_offsets is not None: + vertex_type_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_vertex_type_offsets_ptr, + len(vertex_type_offsets), + SIZE_T + ) + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL if retain_seeds: if starting_vertex_label_offsets is None: @@ -347,6 +366,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, c_graph_ptr, start_vertex_list_ptr, starting_vertex_label_offsets_ptr, + vertex_type_offsets_ptr, fan_out_ptr, num_edge_types, sampling_options, @@ -372,6 +392,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, # Get cupy "views" of the individual arrays to return. These each increment # the refcount on the SamplingResult instance which will keep the data alive # until all references are removed and the GC runs. + cupy_majors = result.get_majors() cupy_major_offsets = result.get_major_offsets() cupy_minors = result.get_minors() @@ -380,6 +401,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, cupy_edge_types = result.get_edge_types() cupy_batch_ids = result.get_batch_ids() cupy_label_hop_offsets = result.get_label_hop_offsets() + cupy_label_type_hop_offsets = result.get_label_type_hop_offsets() if renumber: cupy_renumber_map = result.get_renumber_map() @@ -396,6 +418,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, 'renumber_map_offsets': cupy_renumber_map_offsets, @@ -413,6 +436,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, } # Return everything that isn't null diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx index e2476de1607..cbd7a5dcffb 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx @@ -216,7 +216,7 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, >>> sampling_results = pylibcugraph.homogeneous_biased_neighbor_sample( ... resource_handle, G, start_vertices, starting_vertex_label_offsets, ... h_fan_out, False, True) - >>> >>> sampling_results + >>> sampling_results {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32), 'minors': array([1, 3, 3, 4, 3, 4], dtype=int32), 'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)} @@ -383,8 +383,6 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, if renumber: cupy_renumber_map = result.get_renumber_map() cupy_renumber_map_offsets = result.get_renumber_map_offsets() - cupy_edge_renumber_map = result.get_edge_renumber_map() - cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets() sampling_results = { 'major_offsets': cupy_major_offsets, @@ -397,9 +395,7 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'label_hop_offsets': cupy_label_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, - 'renumber_map_offsets': cupy_renumber_map_offsets, - 'edge_renumber_map' : cupy_edge_renumber_map, - 'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets + 'renumber_map_offsets': cupy_renumber_map_offsets } else: diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx index 3c6cdf77420..bb88ffcf6af 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx @@ -211,7 +211,7 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, >>> sampling_results = pylibcugraph.homogeneous_uniform_neighbor_sample( ... resource_handle, G, start_vertices, starting_vertex_label_offsets, ... h_fan_out, False, True) - >>> >>> sampling_results + >>> sampling_results {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32), 'minors': array([1, 3, 3, 4, 3, 4], dtype=int32), 'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)} @@ -378,8 +378,6 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, if renumber: cupy_renumber_map = result.get_renumber_map() cupy_renumber_map_offsets = result.get_renumber_map_offsets() - cupy_edge_renumber_map = result.get_edge_renumber_map() - cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets() sampling_results = { 'major_offsets': cupy_major_offsets, @@ -392,9 +390,7 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'label_hop_offsets': cupy_label_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, - 'renumber_map_offsets': cupy_renumber_map_offsets, - 'edge_renumber_map' : cupy_edge_renumber_map, - 'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets + 'renumber_map_offsets': cupy_renumber_map_offsets } else: diff --git a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx index b93618d73ce..a2ea7cb9716 100644 --- a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx +++ b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx @@ -24,6 +24,7 @@ from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_sample_result_get_majors, cugraph_sample_result_get_minors, cugraph_sample_result_get_label_hop_offsets, + cugraph_sample_result_get_label_type_hop_offsets, cugraph_sample_result_get_sources, # deprecated cugraph_sample_result_get_destinations, # deprecated cugraph_sample_result_get_edge_weight, @@ -206,6 +207,19 @@ cdef class SamplingResult: return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) + def get_label_type_hop_offsets(self): + if self.c_sample_result_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = ( + cugraph_sample_result_get_label_type_hop_offsets(self.c_sample_result_ptr) + ) + if device_array_view_ptr is NULL: + return None + + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, + self) + # Deprecated def get_offsets(self): if self.c_sample_result_ptr is NULL: diff --git a/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py b/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py index ffa90731483..4dafeb19032 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -183,7 +183,6 @@ def test_neighborhood_sampling_cudf( ) -@pytest.mark.cugraph_ops def test_neighborhood_sampling_large_sg_graph(gpubenchmark): """ Use a large SG graph and set input args accordingly to test/benchmark diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml index 8d22f50cc93..72a5e19c702 100644 --- a/python/pylibcugraph/pyproject.toml +++ b/python/pylibcugraph/pyproject.toml @@ -27,8 +27,8 @@ dependencies = [ "nvidia-curand", "nvidia-cusolver", "nvidia-cusparse", - "pylibraft==24.12.*,>=0.0.0a0", - "rmm==24.12.*,>=0.0.0a0", + "pylibraft==25.2.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -40,7 +40,7 @@ classifiers = [ [project.optional-dependencies] test = [ - "cudf==24.12.*,>=0.0.0a0", + "cudf==25.2.*,>=0.0.0a0", "numpy>=1.23,<3.0a0", "pandas", "pytest", @@ -74,8 +74,8 @@ dependencies-file = "../../dependencies.yaml" requires = [ "cmake>=3.26.4,!=3.30.0", "ninja", - "pylibraft==24.12.*,>=0.0.0a0", - "rmm==24.12.*,>=0.0.0a0", + "pylibraft==25.2.*,>=0.0.0a0", + "rmm==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" diff --git a/python/pylibcugraph/pytest.ini b/python/pylibcugraph/pytest.ini index d5ade9f4836..8ca4e198441 100644 --- a/python/pylibcugraph/pytest.ini +++ b/python/pylibcugraph/pytest.ini @@ -12,7 +12,4 @@ # limitations under the License. [pytest] -markers = - cugraph_ops: Tests requiring cugraph-ops - addopts = --tb=native diff --git a/readme_pages/CONTRIBUTING.md b/readme_pages/CONTRIBUTING.md index ffe1ef1831b..01d5f263624 100644 --- a/readme_pages/CONTRIBUTING.md +++ b/readme_pages/CONTRIBUTING.md @@ -1,5 +1,5 @@ # Contributing to cuGraph -cuGraph, for the most part, is an open-source project where we encourage community involvement. The cugraph-ops package is the expection being a closed-source package. +cuGraph, for the most part, is an open-source project where we encourage community involvement. There are multiple ways to be involved and contribute to the cuGraph community, the top paths are listed below: diff --git a/readme_pages/cugraph_dgl.md b/readme_pages/cugraph_dgl.md deleted file mode 100644 index 7b19787f4c6..00000000000 --- a/readme_pages/cugraph_dgl.md +++ /dev/null @@ -1,27 +0,0 @@ -# cugraph_dgl - -[RAPIDS](https://rapids.ai) cugraph_dgl enables the ability to use cugraph Property Graphs with DGL. This cugraph backend allows DGL users access to a collection of GPU-accelerated algorithms for graph analytics, such as sampling, centrality computation, and community detection. - - -The goal of `cugraph_dgl` is to enable Multi-Node Multi-GPU cugraph accelerated graphs to help train large-scale Graph Neural Networks(GNN) on DGL by providing a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) which uses cugraph for storing graph structure and node/edge feature data. - -## Usage -```diff - -+from cugraph_dgl.convert import cugraph_storage_from_heterograph -+cugraph_g = cugraph_storage_from_heterograph(dgl_g) - -sampler = dgl.dataloading.NeighborSampler( - [15, 10, 5], prefetch_node_feats=['feat'], prefetch_labels=['label']) - -train_dataloader = dgl.dataloading.DataLoader( -- dgl_g, -+ cugraph_g, -train_idx, -sampler, -device=device, -batch_size=1024, -shuffle=True, -drop_last=False, -num_workers=0) -``` diff --git a/readme_pages/cugraph_ops.md b/readme_pages/cugraph_ops.md deleted file mode 100644 index 7bd4ac55185..00000000000 --- a/readme_pages/cugraph_ops.md +++ /dev/null @@ -1,17 +0,0 @@ -

-
- cuGraph -

-

-CuGraphOps -

-Cugraph-ops is a closed-source library that is composed of highly optimized and -performant primitives associated with GNNs and related graph -operations, such as training, sampling and inference. - - -This is how cuGraphOps fits into the cuGraph ecosystem -

-
- cuGraph -

diff --git a/readme_pages/cugraph_pyg.md b/readme_pages/cugraph_pyg.md deleted file mode 100644 index 147cd70b944..00000000000 --- a/readme_pages/cugraph_pyg.md +++ /dev/null @@ -1,22 +0,0 @@ -# cugraph_pyg - -[RAPIDS](https://rapids.ai) cugraph_pyg enables the ability to use cugraph Property Graphs with PyTorch Geometric (PyG). PyG users will have access to cuGraph and cuGraph-Service through the PyG GraphStore, FeatureStore, and Sampler interfaces. Through cugraph_pyg, PyG users have the full power of cuGraph's GPU-accelerated algorithms for graph analytics, such as sampling, centrality computation, and community detection. - - -The goal of `cugraph_pyg` is to enable accelerated single-GPU and multi-node, multi-GPU cugraph accelerated graphs to help train large-scale Graph Neural Networks (GNN) on PyG by providing duck-typed drop-in replacements of the `GraphStore`, `FeatureStore`, and `Sampler` interfaces backed by either cuGraph or cuGraph-Service. - -Users of cugraph_pyg have the option of installing either the cugraph or cugraph_service_client packages. Only one is required. - -## Usage -``` -G = cuGraph.PropertyGraph() -... -feature_store, graph_store = to_pyg(G) -sampler = CuGraphSampler( - data=(feature_store, graph_store), - shuffle=True, - num_neighbors=[10,25], - batch_size=50, -) -... -``` diff --git a/readme_pages/gnn_support.md b/readme_pages/gnn_support.md index 924c2bf62af..72978883531 100644 --- a/readme_pages/gnn_support.md +++ b/readme_pages/gnn_support.md @@ -27,6 +27,6 @@ An overview of GNN's and how they are used is found in this excellent [blog](htt RAPIDS GNN components improve other industy GNN specific projects. Due to the degree distribution of nodes, memory bottlenecks are the pain point for large scale graphs. To solve this problem, sampling operations form the backbone for Graph Neural Networks (GNN) training. However, current sampling methods provided by other libraries are not optimized enough for the whole process of GNN training. The main limit to performance is moving data between the hosts and devices. In cuGraph, we provide an end-to-end solution from data loading to training all on the GPUs. -CuGraph now supports compatibility with [Deep Graph Library](https://www.dgl.ai/) (DGL) and [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/) (PyG) by allowing conversion between a cuGraph object and a DGL or PyG object, making it possible for DGL and PyG users to access efficient data loader and graph operations (such as uniformed sampling) implementations in cuGraph, as well as keep their models unchanged in DGL or PyG. We have considerable speedup compared with the original implementation in DGL and PyG. +CuGraph now supports compatibility with [Deep Graph Library](https://www.dgl.ai/) (DGL) and [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/) (PyG) by allowing conversion between a cuGraph object and a DGL or PyG object, making it possible for DGL and PyG users to access efficient data loader and graph operations (such as uniformed sampling) implementations in cuGraph, as well as keep their models unchanged in DGL or PyG. We have considerable speedup compared with the original implementation in DGL and PyG. The GNN packages are now developed within the [cugraph-gnn](https://github.com/rapidsai/cugraph-gnn) repository. [](https://developer.nvidia.com/blog/optimizing-fraud-detection-in-financial-services-with-graph-neural-networks-and-nvidia-gpus/)