Skip to content

Commit

Permalink
support nvcc host compiler (facebookincubator#898)
Browse files Browse the repository at this point in the history
Summary:
Add `AIT_NVCC_CCBIN` env variable to set nvcc host compiler

Minor fixes:
- document `AIT_ENABLE_CUDA_LTO`
- eliminate an empty space caused by debug options

Pull Request resolved: facebookincubator#898

Reviewed By: sgrigory

Differential Revision: D48435098

Pulled By: aakhundov

fbshipit-source-id: 80160f9258fdd62a1df83115162f84c5db96b15c
  • Loading branch information
chengscott authored and facebook-github-bot committed Aug 20, 2023
1 parent 34340fb commit b5841ab
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 9 deletions.
4 changes: 4 additions & 0 deletions docs/source/reference/env.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Codegen

**AIT_COMPILER_OPT**: The optimization level for a compiler, which is directly passed to the host compiler command line. AITemplate host code may be very light in certain cases, so there is nothing to optimize for a host compiler. Thus, there is no need to make host compiler perform time costly optimizations. It may be very useful to use "-O0" value for debugging GPU kernels. "-O3" by default.

**AIT_NVCC_CCBIN**: nvcc host compiler (ccbin).

**AIT_ENABLE_CUDA_LTO**: If set to "1", nvcc will use LTO flags during compilation. Default value is "0".

**AIT_TIME_COMPILATION**: If set to "1", time each make command at the compilation time. This helps us to do compilation time analysis. Requires to install `time <https://man7.org/linux/man-pages/man1/time.1.html>`_ package.

**AIT_MULTISTREAM_MODE**: Controls multi-stream mode. Default mode is "0".
Expand Down
11 changes: 8 additions & 3 deletions python/aitemplate/backend/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,14 +844,19 @@ def _gen_compiler_version_files(self, target_dir):
# for cache invalidation purposes (different compiler versions
# should not reuse same cached build artifacts )
cc = Target.current().cc()
compilers = {"main_compiler": cc}
compilers = {}
if "nvcc" in cc:
ccbin_match = re.search(r'-ccbin "?([^ "]+)', cc)
# extract the part before " -ccbin " as group #1
# and the content of the quoted expression (until
# the first space) after " -ccbin " as group #2
ccbin_match = re.search(r'(.*) -ccbin "?([^ "]+)', cc)
if ccbin_match:
nvcc_host_compiler = ccbin_match.group(1)
cc = ccbin_match.group(1)
nvcc_host_compiler = ccbin_match.group(2)
else:
nvcc_host_compiler = "g++" # default, using PATH resolution
compilers["nvcc_host_compiler"] = nvcc_host_compiler
compilers["main_compiler"] = cc

# Write compiler version string(s)
# into the build directory, to enable using them for cache hash determination
Expand Down
15 changes: 9 additions & 6 deletions python/aitemplate/backend/cuda/target_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,16 @@ def _build_gnu_host_compiler_options(self) -> List[str]:
def get_host_compiler_options(self) -> List[str]:
return self._build_gnu_host_compiler_options()

def _get_nvcc_debug_options(self) -> str:
CUDA_DEBUG_LEVEL_STRINGS = ["", "-lineinfo", "-g -G"]
def _get_nvcc_debug_options(self) -> List[str]:
CUDA_DEBUG_LEVEL_STRINGS = [[], ["-lineinfo"], ["-g", "-G"]]
level = environ.get_cuda_nvcc_debug_level()
if level.isdigit():
level = int(level)
assert (
level >= 0 and level < 3
), "Debug level out of range. Must be 0 (no debug info), 1 (lineinfo) or 2 (with debug info, disable opt)"
return CUDA_DEBUG_LEVEL_STRINGS[level]
return level
return [level]

def _build_nvcc_compiler_options(self) -> List[str]:
code = [f"sm_{self._arch}", f"compute_{self._arch}"]
Expand All @@ -169,7 +169,7 @@ def _build_nvcc_compiler_options(self) -> List[str]:
"--source-in-ptx",
]
), # Annotate the ptx file with source information
options.append(self._get_nvcc_debug_options())
options.extend(self._get_nvcc_debug_options())
if self._ndebug == 1:
options.append("-DNDEBUG")
if environ.use_fast_math() and (
Expand Down Expand Up @@ -242,7 +242,10 @@ def __exit__(self, ptype, value, trace):
shutil.rmtree(self.lib_folder)

def cc(self):
return "nvcc"
cc = "nvcc"
if environ.nvcc_ccbin():
cc += " -ccbin " + environ.nvcc_ccbin()
return cc

def compile_cmd(self, executable=False):
if executable:
Expand Down Expand Up @@ -443,7 +446,7 @@ def _build_compile_options(self):
"--source-in-ptx", # Annotate the ptx file with source information
]
),
options.append(self._get_nvcc_debug_options())
options.extend(self._get_nvcc_debug_options())
if self._ndebug == 1:
options.append("-DNDEBUG")
FBCUDA.static_compile_options_ = options
Expand Down
7 changes: 7 additions & 0 deletions python/aitemplate/utils/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ def enable_cuda_lto() -> bool:
return os.getenv("AIT_ENABLE_CUDA_LTO", "0") == "1"


def nvcc_ccbin() -> str:
"""
nvcc host compiler (ccbin)
"""
return os.getenv("AIT_NVCC_CCBIN", "")


def force_profiler_cache() -> bool:
"""
Force the profiler to use the cached results. The profiler will throw
Expand Down

0 comments on commit b5841ab

Please sign in to comment.