From a6181353a472a4ea2375e671890d3c085b9f8913 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Sat, 18 Apr 2020 01:34:39 -0400 Subject: [PATCH] build: reduce linux release binary size by 87% Our Linux release binary was hilariously large, weighing in at nearly 800MB (!). Nearly all of the bloat was from DWARF debug info: $ bloaty materialized -n 10 FILE SIZE VM SIZE -------------- -------------- 24.5% 194Mi 0.0% 0 .debug_info 24.1% 191Mi 0.0% 0 .debug_loc 13.8% 109Mi 0.0% 0 .debug_pubtypes 10.1% 79.9Mi 0.0% 0 .debug_pubnames 8.8% 70.0Mi 0.0% 0 .debug_str 8.3% 66.3Mi 0.0% 0 .debug_ranges 4.4% 35.3Mi 0.0% 0 .debug_line 3.1% 24.8Mi 66.3% 24.8Mi .text 1.8% 14.4Mi 25.1% 9.39Mi [41 Others] 0.6% 4.79Mi 0.0% 0 .strtab 0.4% 3.22Mi 8.6% 3.22Mi .eh_frame 100.0% 793Mi 100.0% 37.4Mi TOTAL This patch gets a handle on this by attacking the problem from several angles: 1. We instruct the linker to compress debug info sections. Most of the debug info is redundant and compresses exceptionally well. Part of the reason we didn't notice the issue is because our Docker images and gzipped tarballs were relatively small (~150MB). 2. We strip out the unnecessary `.debug_pubnames` and `.debug_pubtypes` from the binary. This works around a known Rust bug (rust-lang/rust#46034). 3. We ask Rust to generate less debug info for release builds, limiting it to line info. This is enough information to symbolicate a backtrace, but not enough information to run an interactive debugger. This is usually the right tradeoff for a release build. $ bloaty materialized -n 10 VM SIZE FILE SIZE -------------- -------------- 0.0% 0 .debug_info 31.9Mi 33.8% 70.5% 25.0Mi .text 25.0Mi 26.5% 0.0% 0 .debug_str 7.54Mi 8.0% 0.0% 0 .debug_line 6.36Mi 6.7% 9.4% 3.33Mi [38 Others] 5.36Mi 5.7% 0.0% 0 .strtab 4.71Mi 5.0% 0.0% 0 .debug_ranges 3.55Mi 3.8% 8.8% 3.11Mi .eh_frame 3.11Mi 3.3% 0.0% 0 .symtab 2.87Mi 3.0% 6.0% 2.12Mi .rodata 2.12Mi 2.2% 5.4% 1.92Mi .gcc_except_table 1.92Mi 2.0% 100.0% 35.5Mi TOTAL 94.4Mi 100.0% One issue remains unsolved, which is that Rust/LLVM cannot currently garbage collect DWARF that refers to unused symbols/types. The actual symbols get cut from the binary, but their debug info remains. Follow rust-lang/rust#56068 and LLVM D74169 [0] if curious. I tested with the aforementioned lld patch (and none of the other changes) and it cut the binary down to 300MB. With the other changes, the savings are less substantial, but probably another 10MB to be had. [0]: https://reviews.llvm.org/D74169 --- .cargo/config | 8 ++++++++ .gitignore | 1 - Cargo.toml | 8 +++++++- bin/lint | 1 + misc/python/mzbuild.py | 37 ++++++++++++++++++++++++++++++------- 5 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 .cargo/config diff --git a/.cargo/config b/.cargo/config new file mode 100644 index 0000000000000..e4c0c93aa37b4 --- /dev/null +++ b/.cargo/config @@ -0,0 +1,8 @@ +[target."x86_64-unknown-linux-gnu"] +# Compressing debug information can yield hundreds of megabytes of savings. +# The Rust toolchain does not currently perform dead code elimination on +# debug info. +# +# See: https://github.com/rust-lang/rust/issues/56068 +# See: https://reviews.llvm.org/D74169#1990180 +rustflags = ["-C", "link-arg=-Wl,--compress-debug-sections=zlib-gabi"] diff --git a/.gitignore b/.gitignore index 3e219c873a51b..d19e3bed16e89 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,6 @@ target miri-target -/.cargo .mtrlz.log **/*.rs.bk .netlify diff --git a/Cargo.toml b/Cargo.toml index a5fa23dd36ccf..eff750733e27a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,13 @@ members = [ ] [profile.release] -debug = true +# Emit only the line info tables, not full debug info, in release builds, to +# substantially reduce the size of the debug info. Line info tables are enough +# to symbolicate a backtrace, but not enough to use a debugger interactively. +# This seems to be the right tradeoff for release builds: it's unlikely we're +# going to get interactive access to a debugger in production installations, but +# we still want useful crash reports. +debug = 1 [patch.crates-io] # Waiting on a release with this commit: diff --git a/bin/lint b/bin/lint index 1aa020cdb2cad..8da770d9679ac 100755 --- a/bin/lint +++ b/bin/lint @@ -60,6 +60,7 @@ copyright_files=$(grep -vE \ -e '(^|/)\.gitmodules$' \ -e '(^|/)go\.sum$' \ -e '(^|/)Cargo\.toml$' \ + -e '^\.cargo/config$' \ -e '^Cargo\.lock$' \ -e '^deny\.toml$' \ -e '^netlify\.toml$' \ diff --git a/misc/python/mzbuild.py b/misc/python/mzbuild.py index a9d4339391572..1b66e2f30a63e 100644 --- a/misc/python/mzbuild.py +++ b/misc/python/mzbuild.py @@ -98,11 +98,15 @@ def xcargo_target_dir(root: Path) -> Path: return root / "target" / "x86_64-unknown-linux-gnu" -def xstrip(root: Path) -> str: +def xbinutil(tool: str) -> str: if sys.platform == "linux": - return "strip" + return tool else: - return "x86_64-unknown-linux-gnu-strip" + return f"x86_64-unknown-linux-gnu-{tool}" + + +xobjcopy = xbinutil("objcopy") +xstrip = xbinutil("strip") def docker_images() -> Set[str]: @@ -157,13 +161,32 @@ def run(self, root: Path, path: Path) -> None: # down CI, since we're packaging these binaries up into Docker # images and shipping them around. A bit unfortunate, since it'd be # nice to have useful backtraces if the binary crashes. - runv([xstrip(root), path / self.bin]) + runv([xstrip, path / self.bin]) + else: + # Even if we've been asked not to strip the binary, remove the + # `.debug_pubnames` and `.debug_pubtypes` sections. These are just + # indexes that speed up launching a debugger against the binary, + # and we're happy to have slower debugger start up in exchange for + # smaller binaries. Plus the sections have been obsoleted by a + # `.debug_names` section in DWARF 5, and so debugger support for + # `.debug_pubnames`/`.debug_pubtypes` is minimal anyway. + # See: https://github.com/rust-lang/rust/issues/46034 + runv( + [ + xobjcopy, + "-R", + ".debug_pubnames", + "-R", + ".debug_pubtypes", + path / self.bin, + ] + ) def depends(self, root: Path, path: Path) -> List[bytes]: # TODO(benesch): this should be much smarter about computing the Rust # files that actually contribute to this binary target. return super().depends(root, path) + git_ls_files( - root, "src/**", "Cargo.toml", "Cargo.lock" + root, "src/**", "Cargo.toml", "Cargo.lock", ".cargo" ) @@ -216,7 +239,7 @@ def run(self, root: Path, path: Path) -> None: with open(path / "tests" / "manifest", "w") as manifest: for (executable, slug, crate_path) in tests: shutil.copy(executable, path / "tests" / slug) - runv([xstrip(root), path / "tests" / slug]) + runv([xstrip, path / "tests" / slug]) manifest.write(f"{slug} {crate_path}\n") shutil.move(str(path / "testdrive"), path / "tests") shutil.copy( @@ -229,7 +252,7 @@ def depends(self, root: Path, path: Path) -> List[bytes]: # TODO(benesch): this should be much smarter about computing the Rust # files that actually contribute to this binary target. return super().depends(root, path) + git_ls_files( - root, "src/**", "Cargo.toml", "Cargo.lock" + root, "src/**", "Cargo.toml", "Cargo.lock", ".cargo" )