GPU wheel improvements. (#1661)

- Test script: use intel-oneapi-mpi when testing wheels on BB5. - Add `NRN_WHEEL_BUILD` variable for use inside the CMake configuration when tuning build options for Python wheels. - Bump submodule past BlueBrain/CoreNeuron#785. - Don't print single-MPI variables when dynamic MPI is enabled. - When running `nrnivmodl` on user machines in GPU wheel installations, modify libnrniv.so before linking to it: - Remove link dependencies on bundled NVIDIA runtime libraries, so these are not duplicated when the same runtimes from the user's system are implicitly linked by `nvc++`. - Require `patchelf` on the user's system to achieve this. - Don't add single-MPI include directories in dynamic MPI builds. - Make the CMake logic to translate target dependencies into compiler flags more verbose. - Build GPU wheels with `-tp=haswell` instead of the default (`-tp=host`), but remove this from the Makefiles used by `nrnivmodl` on user machines. Co-authored-by: Pramod Kumbhar <[email protected]> Co-authored-by: Alexandru Săvulescu <[email protected]>
neuronsimulator · Mar 8, 2022 · 4defcc3 · 4defcc3
1 parent b08f55f
commit 4defcc3
Show file tree

Hide file tree

Showing 9 changed files with 80 additions and 20 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -76,8 +76,10 @@ option(NRN_DYNAMIC_UNITS_USE_LEGACY "Use legacy units as default for dynamic uni
 # note that if CoreNEURON is enabled then it is not necessary to enable this option
 option(NRN_ENABLE_MOD_COMPATIBILITY "Enable CoreNEURON compatibility for MOD files" ${NRN_ENABLE_MOD_COMPATIBILITY_DEFAULT})
 option(NRN_ENABLE_REL_RPATH "Use relative RPATH in binaries. for relocatable installs/Python" ${NRN_ENABLE_REL_RPATH_DEFAULT})
+option(NRN_WHEEL_BUILD ${NRN_WHEEL_BUILD_DEFAULT})
 option(NRN_WHEEL_STATIC_READLINE "Use static readline libraries for the wheels." ${NRN_WHEEL_STATIC_READLINE_DEFAULT})
 mark_as_advanced(NRN_ENABLE_REL_RPATH)
+mark_as_advanced(NRN_WHEEL_BUILD)
 
 # =============================================================================
 # Build options (string)
@@ -731,8 +733,6 @@ else()
 endif()
 message(STATUS "MPI           | ${NRN_ENABLE_MPI}")
 if(NRN_ENABLE_MPI)
-  message(STATUS "  INC         | ${MPI_INCLUDE_PATH}")
-  message(STATUS "  LIB         | ${MPI_LIBRARY}")
   message(STATUS "  DYNAMIC     | ${NRN_ENABLE_MPI_DYNAMIC}")
   if(NRN_ENABLE_MPI_DYNAMIC)
     list(LENGTH NRN_MPI_LIBNAME_LIST _num_mpi)
@@ -743,6 +743,9 @@ if(NRN_ENABLE_MPI)
       message(STATUS "    LIBNAME   | ${libname}")
       message(STATUS "    INC       | ${include}")
     endforeach(val)
+  else()
+    message(STATUS "  INC         | ${MPI_INCLUDE_PATH}")
+    message(STATUS "  LIB         | ${MPI_LIBRARY}")
   endif()
 endif()
 message(STATUS "Python        | ${NRN_ENABLE_PYTHON}")

diff --git a/bin/nrnivmodl_makefile_cmake.in b/bin/nrnivmodl_makefile_cmake.in
@@ -29,13 +29,39 @@ datadir_lib := ${ROOT}/@CMAKE_INSTALL_DATADIR@/lib
 # - @NRN_COMPILE_DEFS
 # - @NRN_LINK_DEFS
 LDFLAGS = $(LINKFLAGS) $(UserLDFLAGS) @NRN_LINK_DEFS@
-NRNLIB_FLAGS = -L$(libdir) -lnrniv
+
+# In GPU wheel distributions then the shipped libnrniv.so is linked against
+# some NVIDIA runtime libraries that are shipped with the wheel. If we use
+# nrnivmodl on the user machine then the NVIDIA compilers will link the local
+# versions of these libraries too, causing duplication.
+libnrniv_without_nvidia: $(libdir)/libnrniv.so
+	cp -v $(libdir)/libnrniv.so $(OUTPUT)/libnrniv-without-nvidia.so
+	patchelf $(OUTPUT)/libnrniv-without-nvidia.so --print-needed | grep '^libnv\(hpcatm\|omp\|cpumath\|cpumath-avx2\|c\)-[a-f0-9]\{8\}\.so' | xargs -t -r -n 1 patchelf $(OUTPUT)/libnrniv-without-nvidia.so --remove-needed
+	patchelf $(OUTPUT)/libnrniv-without-nvidia.so --set-soname libnrniv-without-nvidia.so
+	patchelf $(OUTPUT)/libnrniv-without-nvidia.so --print-rpath
+	ldd $(OUTPUT)/libnrniv-without-nvidia.so
+
+# In a GPU wheel build then we need to fudge libnrniv.so before linking to it.
+# NEURONDEMO should be set when we run this as part of the wheel build, in
+# which case we do *not* want this hack.
+ifeq (@NRN_ENABLE_CORENEURON@@NRN_WHEEL_BUILD@@CORENRN_ENABLE_GPU@$(if $(NRNDEMO),OFF,ON), ONONONON)
+  NRNLIB_FLAGS = -L$(OUTPUT) -lnrniv-without-nvidia
+  NRNLIB_RPATH_FLAGS = -Wl,-rpath,\$$ORIGIN -Wl,-rpath,\$$ORIGIN/..
+  nrn_lib = libnrniv_without_nvidia
+else
+  NRNLIB_FLAGS = -L$(libdir) -lnrniv
+  NRNLIB_RPATH_FLAGS = -Wl,-rpath,$(libdir)
+  nrn_lib =
+endif
+
 OS_NAME := $(shell uname)
 _cm =,
 
 # We rebuild the include dirs since a lot of stuff changes place
 INCLUDES = -I. $(INCFLAGS) $(UserINCFLAGS) -I$(incdir)
-INCLUDES += $(if @MPI_C_INCLUDE_PATH@, -I$(subst ;, -I,@MPI_C_INCLUDE_PATH@),)
+ifeq (@NRN_ENABLE_MPI_DYNAMIC@, OFF)
+  INCLUDES += $(if @MPI_C_INCLUDE_PATH@, -I$(subst ;, -I,@MPI_C_INCLUDE_PATH@),)
+endif
 
 # CC/CXX are always defined. If the definition comes from default change it
 ifeq ($(origin CC), default)
@@ -104,13 +130,13 @@ special: $(mech_lib)
 
 $(mech_lib): $(mech_lib_type)
 
-mech_lib_shared: mod_func.o $(mod_objs) $(c_objs) build_always
+mech_lib_shared: mod_func.o $(mod_objs) $(c_objs) $(nrn_lib) build_always
 	@printf " => $(C_GREEN)LINKING$(C_RESET) shared library $(mech_lib)\n"
 	$(CXX_LINK_SHARED) -I $(incdir) -o ${mech_lib} ${_SONAME} \
-	  $(mod_func_o) $(mod_objs) $(c_objs) $(NRNLIB_FLAGS) -Wl,-rpath,$(libdir) $(LDFLAGS)
+	  $(mod_func_o) $(mod_objs) $(c_objs) $(NRNLIB_FLAGS) $(NRNLIB_RPATH_FLAGS) $(LDFLAGS)
 	rm -f $(OBJS_DIR)/.libs/libnrnmech.so ; mkdir -p $(OBJS_DIR)/.libs ; cp $(mech_lib) $(OBJS_DIR)/.libs/libnrnmech.so
 
-mech_lib_static: mod_func.o $(mod_objs) $(c_objs) build_always
+mech_lib_static: mod_func.o $(mod_objs) $(c_objs) $(nrn_lib) build_always
 	@printf " => $(C_GREEN)LINKING$(C_RESET) static library $(mech_lib)\n"
 	ar cq ${mech_lib} $(mod_func_o) $(mod_objs) $(cobjs);
 

diff --git a/cmake/BuildOptionDefaults.cmake b/cmake/BuildOptionDefaults.cmake
@@ -38,6 +38,7 @@ set(PYTHON_EXECUTABLE_DEFAULT "")
 set(IV_LIB_DEFAULT "")
 
 # For wheel deployment
+set(NRN_WHEEL_BUILD_DEFAULT OFF)
 set(NRN_WHEEL_STATIC_READLINE_DEFAULT OFF)
 
 # we add some coreneuron options in order to check

diff --git a/cmake/CMakeListsNrnMech.cmake b/cmake/CMakeListsNrnMech.cmake
@@ -30,16 +30,14 @@ foreach(link_lib ${NRN_LINK_LIBS})
 
   get_filename_component(dir_path ${link_lib} DIRECTORY)
   if(TARGET ${link_lib})
-    message(NOTICE "Using Target in compiling and linking, you should take care of it if
-                    it fail miserabily (CODE: 1234567890)")
     get_property(link_flag TARGET ${link_lib} PROPERTY INTERFACE_LINK_LIBRARIES)
-    string(APPEND NRN_LINK_DEFS ${link_flag})
+    set(description "Extracting link flags from target '${link_lib}', beware that this can be fragile.")
     # Not use it yet because it can be generator expressions
     # get_property(compile_flag TARGET ${link_lib} PROPERTY INTERFACE_COMPILE_OPTIONS)
     # string(APPEND NRN_COMPILE_DEFS ${compile_flag})
-    continue()
   elseif(NOT dir_path)
-    string(APPEND NRN_LINK_DEFS " -l${link_lib}")
+    set(link_flag "-l${link_lib}")
+    set(description "Generating link flags from name '${link_lib}', beware that this can be fragile.")
   # avoid library paths from special directory /nrnwheel which
   # used to build wheels under docker container
   elseif("${dir_path}" MATCHES "^/nrnwheel")
@@ -49,10 +47,14 @@ foreach(link_lib ${NRN_LINK_LIBS})
     get_filename_component(libname ${link_lib} NAME)
     string(REGEX REPLACE "\\.[^.]*$" "" libname_wle ${libname})
     string(REGEX REPLACE "^lib" "" libname_wle ${libname_wle})
-    string(APPEND NRN_LINK_DEFS " -l${libname_wle}")
+    set(link_flag "-l${libname_wle}")
+    set(description "Extracting link flags from path '${link_lib}', beware that this can be fragile.")
   else()
-    string(APPEND NRN_LINK_DEFS " ${link_lib} -Wl,-rpath,${dir_path}")
+    set(link_flag "${link_lib} -Wl,-rpath,${dir_path}")
+    set(description "Generating link flags from path ${link_lib}")
   endif()
+  message(NOTICE "${description} Got: ${link_flag}")
+  string(APPEND NRN_LINK_DEFS " ${link_flag}")
 endforeach()
 
 # PGI add --c++11;-A option for c++11 flag

diff --git a/external/coreneuron b/external/coreneuron
diff --git a/packaging/python/build_wheels.bash b/packaging/python/build_wheels.bash
@@ -84,7 +84,7 @@ build_wheel_linux() {
 
     CMAKE_DEFS="NRN_MPI_DYNAMIC=$3"
     if [ "$USE_STATIC_READLINE" == "1" ]; then
-      CMAKE_DEFS="$CMAKE_DEFS,NRN_WHEEL_STATIC_READLINE=ON"
+      CMAKE_DEFS="$CMAKE_DEFS,NRN_WHEEL_BUILD=ON,NRN_WHEEL_STATIC_READLINE=ON"
     fi
 
     if [ "$2" == "coreneuron" ]; then
@@ -96,10 +96,11 @@ build_wheel_linux() {
         source ~/.bashrc
         module load nvhpc
         unset CC CXX
+        # make the NVIDIA compilers default to targeting haswell CPUs
         # the default is currently 70;80, partly because NVHPC does not
         # support OpenMP target offload with 60. Wheels use mod2c and
         # OpenACC for now, so we can be a little more generic.
-        CMAKE_DEFS="${CMAKE_DEFS},CMAKE_CUDA_ARCHITECTURES=60;70;80"
+        CMAKE_DEFS="${CMAKE_DEFS},CMAKE_CUDA_ARCHITECTURES=60;70;80,CMAKE_C_FLAGS=-tp=haswell,CMAKE_CXX_FLAGS=-tp=haswell"
     fi
 
     python setup.py build_ext --cmake-prefix="/nrnwheel/ncurses;/nrnwheel/readline" --cmake-defs="$CMAKE_DEFS" $setup_args bdist_wheel
@@ -144,7 +145,7 @@ build_wheel_osx() {
 
     CMAKE_DEFS="NRN_MPI_DYNAMIC=$3"
     if [ "$USE_STATIC_READLINE" == "1" ]; then
-      CMAKE_DEFS="$CMAKE_DEFS,NRN_WHEEL_STATIC_READLINE=ON"
+      CMAKE_DEFS="$CMAKE_DEFS,NRN_WHEEL_BUILD=ON,NRN_WHEEL_STATIC_READLINE=ON"
     fi
 
     # We need to "fix" the platform tag if the Python installer is universal2

diff --git a/packaging/python/fix_target_processor_in_makefiles.sh b/packaging/python/fix_target_processor_in_makefiles.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -ex
+
+instdir="$1"
+
+# We build the GPU wheels with -tp=haswell for portability, but we don't want to
+# embed this in the Makefiles in the wheels themselves to (hopefully) give
+# better performance when users run nrnivmodl themselves.
+for makefile in bin/nrnmech_makefile share/coreneuron/nrnivmodl_core_makefile
+do
+  sed -i.old -e 's#-tp=haswell##g' "${instdir}/${makefile}"
+  ! diff -u "${instdir}/${makefile}.old" "${instdir}/${makefile}"
+  rm "${instdir}/${makefile}.old"
+done
diff --git a/packaging/python/test_wheels.sh b/packaging/python/test_wheels.sh
@@ -220,7 +220,7 @@ run_parallel_test() {
     # BB5 with multiple MPI libraries
     elif [[ $(hostname -f) = *r*bbp.epfl.ch* ]]; then
       run_mpi_test "srun" "HPE-MPT" "hpe-mpi"
-      run_mpi_test "mpirun" "Intel MPI" "intel-mpi"
+      run_mpi_test "mpirun" "Intel MPI" "intel-oneapi-mpi"
       run_mpi_test "srun" "MVAPICH2" "mvapich2"
 
     # circle-ci build

diff --git a/setup.py b/setup.py
@@ -264,6 +264,15 @@ def _run_cmake(self, ext):
                     cwd=self.build_temp,
                     env=env,
                 )
+                if Components.GPU:
+                    subprocess.check_call(
+                        [
+                            ext.sourcedir + "/packaging/python/fix_target_processor_in_makefiles.sh",
+                            ext.cmake_install_prefix,
+                        ],
+                        cwd=self.build_temp,
+                        env=env,
+                    )
 
         except subprocess.CalledProcessError as exc:
             log.error("Status : FAIL. Log:\n%s", exc.output)
@@ -422,6 +431,10 @@ def setup_package():
     # For CI, we want to build separate wheel with "-nightly" suffix
     package_name += os.environ.get("NEURON_NIGHTLY_TAG", "-nightly")
 
+    # GPU wheels use patchelf to avoid duplicating NVIDIA runtime libraries when
+    # using nrnivmodl.
+    maybe_patchelf = ['patchelf'] if Components.GPU else []
+
     setup(
         name=package_name,
         version=__version__,
@@ -435,7 +448,7 @@ def setup_package():
             if f[0] != "_"
         ],
         cmdclass=dict(build_ext=CMakeAugmentedBuilder, docs=Docs),
-        install_requires=["numpy>=1.9.3"],
+        install_requires=["numpy>=1.9.3"] + maybe_patchelf,
         tests_require=["flake8", "pytest"],
         setup_requires=["wheel"] + maybe_docs + maybe_test_runner + maybe_rxd_reqs,
         dependency_links=[],
+12 −3		.github/pull_request_template.md
+3 −3		.github/workflows/test-as-submodule.yml
+2 −0		.gitlab-ci.yml
+27 −15		CMake/OpenAccHelper.cmake
+23 −3		CMakeLists.txt
+11 −1		coreneuron/CMakeLists.txt
+8 −6		coreneuron/apps/main1.cpp
+12 −17		coreneuron/io/output_spikes.cpp
+2 −2		coreneuron/io/output_spikes.hpp
+8 −6		coreneuron/io/reports/nrnreport.hpp
+6 −7		coreneuron/io/reports/report_configuration_parser.cpp
+21 −5		coreneuron/io/reports/sonata_report_handler.cpp
+7 −2		coreneuron/io/reports/sonata_report_handler.hpp
+3 −2		coreneuron/mechanism/eion.cpp
+27 −3		coreneuron/utils/randoms/nrnran123.cu
+13 −1		extra/nrnivmodl_core_makefile.in
+6 −1		tests/integration/reportinglib/1.check.in
+ −		tests/integration/reportinglib/1.report