From 814fa9954b2436f228adb761205e03aaf1272703 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 30 May 2024 21:02:14 +0800 Subject: [PATCH 1/3] Move the rabit poll helper. Cleanup all references to the rabit directory. --- CMakeLists.txt | 1 - R-package/CMakeLists.txt | 2 -- R-package/src/Makevars.in | 1 - R-package/src/Makevars.win | 1 - cmake/Utils.cmake | 1 - demo/c-api/basic/Makefile | 2 +- doc/build.rst | 2 +- doc/faq.rst | 2 +- .../socket.h => include/xgboost/collective/poll_utils.h | 0 jvm-packages/CMakeLists.txt | 3 +-- python-package/packager/sdist.py | 1 - src/collective/loop.cc | 8 ++++---- src/collective/socket.cc | 4 ++-- src/collective/tracker.cc | 9 +++++---- tests/ci_build/deploy_jvm_packages.sh | 2 +- tests/ci_build/test_r_package.py | 4 ---- tests/ci_build/tidy.py | 1 - tests/cpp/CMakeLists.txt | 6 ++---- 18 files changed, 18 insertions(+), 32 deletions(-) rename rabit/include/rabit/internal/socket.h => include/xgboost/collective/poll_utils.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index c69b0d2a3dc7..a07cff734d56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI) PRIVATE ${xgboost_SOURCE_DIR}/include ${xgboost_SOURCE_DIR}/dmlc-core/include - ${xgboost_SOURCE_DIR}/rabit/include ) set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost) xgboost_target_properties(runxgboost) diff --git a/R-package/CMakeLists.txt b/R-package/CMakeLists.txt index 37c5dbf4c1ed..75c3e2d77449 100644 --- a/R-package/CMakeLists.txt +++ b/R-package/CMakeLists.txt @@ -29,7 +29,6 @@ target_compile_definitions( -DDMLC_LOG_BEFORE_THROW=0 -DDMLC_DISABLE_STDIN=1 -DDMLC_LOG_CUSTOMIZE=1 - -DRABIT_STRICT_CXX98_ ) target_include_directories( @@ -37,7 +36,6 @@ target_include_directories( ${LIBR_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/dmlc-core/include - ${PROJECT_SOURCE_DIR}/rabit/include ) target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY}) diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 93cfb8e5b4c1..0cabffcad3c8 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v))) PKG_CPPFLAGS = \ -I$(PKGROOT)/include \ -I$(PKGROOT)/dmlc-core/include \ - -I$(PKGROOT)/rabit/include \ -I$(PKGROOT) \ $(XGB_RFLAGS) diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index f160930e8a4a..c49006c5e0a6 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v))) PKG_CPPFLAGS = \ -I$(PKGROOT)/include \ -I$(PKGROOT)/dmlc-core/include \ - -I$(PKGROOT)/rabit/include \ -I$(PKGROOT) \ $(XGB_RFLAGS) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index 317a71c00d22..9006bb0ea084 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -151,7 +151,6 @@ function(xgboost_set_cuda_flags target) target_include_directories( ${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap - ${xgboost_SOURCE_DIR}/rabit/include ${CUDAToolkit_INCLUDE_DIRS}) if(MSVC) diff --git a/demo/c-api/basic/Makefile b/demo/c-api/basic/Makefile index 345079fa9a75..dceb9bc73a11 100644 --- a/demo/c-api/basic/Makefile +++ b/demo/c-api/basic/Makefile @@ -4,7 +4,7 @@ TGT=c-api-demo cc=cc CFLAGS ?=-O3 XGBOOST_ROOT ?=../.. -INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include +INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include LIB_DIR=-L$(XGBOOST_ROOT)/lib build: $(TGT) diff --git a/doc/build.rst b/doc/build.rst index 4a2d47c0f885..fda64820ad04 100644 --- a/doc/build.rst +++ b/doc/build.rst @@ -138,7 +138,7 @@ From the command line on Linux starting from the XGBoost directory: .. note:: Faster distributed GPU training with NCCL - By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link `_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**. + By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link `_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**. .. code-block:: bash diff --git a/doc/faq.rst b/doc/faq.rst index 4fe63076c18b..cdfb8bc2cb3c 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp and use less resources to complete the task (thus with less communication and chance of failure). To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it. -The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit). +The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs. Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE. Most importantly, it pushes the limit of the computation resources we can use. diff --git a/rabit/include/rabit/internal/socket.h b/include/xgboost/collective/poll_utils.h similarity index 100% rename from rabit/include/rabit/internal/socket.h rename to include/xgboost/collective/poll_utils.h diff --git a/jvm-packages/CMakeLists.txt b/jvm-packages/CMakeLists.txt index 36ed61a6b063..c6353d4b7400 100644 --- a/jvm-packages/CMakeLists.txt +++ b/jvm-packages/CMakeLists.txt @@ -21,7 +21,6 @@ target_include_directories(xgboost4j ${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native ${PROJECT_SOURCE_DIR}/include - ${PROJECT_SOURCE_DIR}/dmlc-core/include - ${PROJECT_SOURCE_DIR}/rabit/include) + ${PROJECT_SOURCE_DIR}/dmlc-core/include) set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib) diff --git a/python-package/packager/sdist.py b/python-package/packager/sdist.py index 4c70c24fe8cc..013a28022009 100644 --- a/python-package/packager/sdist.py +++ b/python-package/packager/sdist.py @@ -18,7 +18,6 @@ def copy_cpp_src_tree( "include", "dmlc-core", "gputreeshap", - "rabit", "cmake", "plugin", ]: diff --git a/src/collective/loop.cc b/src/collective/loop.cc index 8e84b6a82c84..cd7740fd2805 100644 --- a/src/collective/loop.cc +++ b/src/collective/loop.cc @@ -14,10 +14,10 @@ #include // for thread #include // for move -#include "rabit/internal/socket.h" // for PollHelper -#include "xgboost/collective/result.h" // for Fail, Success -#include "xgboost/collective/socket.h" // for FailWithCode -#include "xgboost/logging.h" // for CHECK +#include "xgboost/collective/poll_utils.h" // for PollHelper +#include "xgboost/collective/result.h" // for Fail, Success +#include "xgboost/collective/socket.h" // for FailWithCode +#include "xgboost/logging.h" // for CHECK namespace xgboost::collective { Result Loop::ProcessQueue(std::queue* p_queue) const { diff --git a/src/collective/socket.cc b/src/collective/socket.cc index 99b02f665f10..38e727caf836 100644 --- a/src/collective/socket.cc +++ b/src/collective/socket.cc @@ -11,8 +11,8 @@ #include // for error_code, system_category #include // for sleep_for -#include "rabit/internal/socket.h" // for PollHelper -#include "xgboost/collective/result.h" // for Result +#include "xgboost/collective/poll_utils.h" // for PollHelper +#include "xgboost/collective/result.h" // for Result #if defined(__unix__) || defined(__APPLE__) #include // getaddrinfo, freeaddrinfo diff --git a/src/collective/tracker.cc b/src/collective/tracker.cc index c8776f294690..9441ab449080 100644 --- a/src/collective/tracker.cc +++ b/src/collective/tracker.cc @@ -1,7 +1,7 @@ /** * Copyright 2023-2024, XGBoost Contributors */ -#include "rabit/internal/socket.h" + #if defined(__unix__) || defined(__APPLE__) #include // gethostbyname #include // socket, AF_INET6, AF_INET, connect, getsockname @@ -27,9 +27,10 @@ #include "comm.h" #include "protocol.h" // for kMagic, PeerInfo #include "tracker.h" -#include "xgboost/collective/result.h" // for Result, Fail, Success -#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ... -#include "xgboost/json.h" // for Json +#include "xgboost/collective/poll_utils.h" // for PollHelper +#include "xgboost/collective/result.h" // for Result, Fail, Success +#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ... +#include "xgboost/json.h" // for Json namespace xgboost::collective { diff --git a/tests/ci_build/deploy_jvm_packages.sh b/tests/ci_build/deploy_jvm_packages.sh index 9531d79a9937..7c909483acaa 100755 --- a/tests/ci_build/deploy_jvm_packages.sh +++ b/tests/ci_build/deploy_jvm_packages.sh @@ -17,7 +17,7 @@ cd jvm-packages rm -rf $(find . -name target) rm -rf ../build/ -# Re-build package without Mock Rabit +# Re-build package # Maven profiles: # `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example # `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON` diff --git a/tests/ci_build/test_r_package.py b/tests/ci_build/test_r_package.py index 1fe1644add1f..add31b97313c 100644 --- a/tests/ci_build/test_r_package.py +++ b/tests/ci_build/test_r_package.py @@ -50,10 +50,6 @@ def pkgroot(path: str) -> None: shutil.copytree("src", dest / "src" / "src") shutil.copytree("include", dest / "src" / "include") shutil.copytree("amalgamation", dest / "src" / "amalgamation") - # rabit - rabit = Path("rabit") - os.mkdir(dest / "src" / rabit) - shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include") # dmlc-core dmlc_core = Path("dmlc-core") os.mkdir(dest / "src" / dmlc_core) diff --git a/tests/ci_build/tidy.py b/tests/ci_build/tidy.py index 16fb07e0d1c2..494365a843ea 100755 --- a/tests/ci_build/tidy.py +++ b/tests/ci_build/tidy.py @@ -192,7 +192,6 @@ def _configure(self): def should_lint(path): if not self.cpp_lint and path.endswith('.cc'): return False - isxgb = path.find('rabit') == -1 isxgb = isxgb and path.find('dmlc-core') == -1 isxgb = isxgb and (not path.startswith(self.cdb_path)) if isxgb: diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index 2748e13098b6..6496f8af45de 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -25,8 +25,7 @@ if(PLUGIN_SYCL) PRIVATE ${gtest_SOURCE_DIR}/include ${xgboost_SOURCE_DIR}/include - ${xgboost_SOURCE_DIR}/dmlc-core/include - ${xgboost_SOURCE_DIR}/rabit/include) + ${xgboost_SOURCE_DIR}/dmlc-core/include) target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1) target_link_libraries(plugin_sycl_test PUBLIC -fsycl) @@ -66,8 +65,7 @@ target_include_directories(testxgboost PRIVATE ${GTEST_INCLUDE_DIRS} ${xgboost_SOURCE_DIR}/include - ${xgboost_SOURCE_DIR}/dmlc-core/include - ${xgboost_SOURCE_DIR}/rabit/include) + ${xgboost_SOURCE_DIR}/dmlc-core/include) target_link_libraries(testxgboost PRIVATE GTest::gtest GTest::gmock) From 546d5535d5702e9ea7ccf939dfe2de3d0af9dc19 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 30 May 2024 21:06:18 +0800 Subject: [PATCH 2/3] lint. --- include/xgboost/collective/poll_utils.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/include/xgboost/collective/poll_utils.h b/include/xgboost/collective/poll_utils.h index 3701146d4577..514e0a5c6633 100644 --- a/include/xgboost/collective/poll_utils.h +++ b/include/xgboost/collective/poll_utils.h @@ -3,8 +3,7 @@ * \file socket.h * \author Tianqi Chen */ -#ifndef RABIT_INTERNAL_SOCKET_H_ -#define RABIT_INTERNAL_SOCKET_H_ +#pragma once #include "xgboost/collective/result.h" #include "xgboost/collective/socket.h" @@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT #pragma message("Distributed training on mingw is not supported.") typedef struct pollfd { SOCKET fd; - short events; - short revents; + short events; // NOLINT + short revents; // NOLINT } WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD; // POLLRDNORM | POLLRDBAND @@ -97,7 +96,8 @@ std::enable_if_t, xgboost::collective::Result> PollError(E if ((revents & POLLERR) != 0) { auto err = errno; auto str = strerror(err); - return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} + + return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT + std::string{str} + // NOLINT " code:" + std::to_string(err)); } if ((revents & POLLNVAL) != 0) { @@ -229,5 +229,3 @@ struct PollHelper { #undef POLLPRI #undef POLLOUT #endif // IS_MINGW() - -#endif // RABIT_INTERNAL_SOCKET_H_ From 4e8c212dd31cfe16310efd549744908f790be070 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 30 May 2024 21:22:42 +0800 Subject: [PATCH 3/3] tidy. --- tests/ci_build/tidy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci_build/tidy.py b/tests/ci_build/tidy.py index 494365a843ea..7116eb78e039 100755 --- a/tests/ci_build/tidy.py +++ b/tests/ci_build/tidy.py @@ -192,7 +192,7 @@ def _configure(self): def should_lint(path): if not self.cpp_lint and path.endswith('.cc'): return False - isxgb = isxgb and path.find('dmlc-core') == -1 + isxgb = path.find('dmlc-core') == -1 isxgb = isxgb and (not path.startswith(self.cdb_path)) if isxgb: print(path)