Skip to content

Commit

Permalink
[ELF] Making cdsort default for function reordering (#68638)
Browse files Browse the repository at this point in the history
CDSort function reordering outperforms the existing default heuristic (
hfsort/C^3) in terms of the performance of generated binaries while
being (almost) as fast. Thus, the suggestion is to change the default.
The speedup is up to 1.5% perf for large front-end binaries, and can be
moderate/neutral for "small" benchmarks.

High-level **perf impact** on two selected binaries:
clang-10 binary (built with LTO+AutoFDO/CSSPGO): wins on top of C^3 in
[0.3%..0.8%]
rocksDB-8 binary (built with LTO+CSSPGO): wins on top of C^3 in
[0.8%..1.5%]

More detailed measurements on the clang binary is at
[here](https://reviews.llvm.org/D152834#4445042)
  • Loading branch information
spupyrev authored Oct 10, 2023
1 parent fc654b4 commit d5c1d73
Show file tree
Hide file tree
Showing 8 changed files with 12 additions and 18 deletions.
2 changes: 1 addition & 1 deletion lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1095,7 +1095,7 @@ static void ltoValidateAllVtablesHaveTypeInfos(opt::InputArgList &args) {
}

static CGProfileSortKind getCGProfileSortKind(opt::InputArgList &args) {
StringRef s = args.getLastArgValue(OPT_call_graph_profile_sort, "hfsort");
StringRef s = args.getLastArgValue(OPT_call_graph_profile_sort, "cdsort");
if (s == "hfsort")
return CGProfileSortKind::Hfsort;
if (s == "cdsort")
Expand Down
4 changes: 2 additions & 2 deletions lld/docs/ld.lld.1
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ may be:
.It Cm none
Ignore call graph profile.
.It Cm hfsort
Use hfsort (default).
Use hfsort.
.It Cm cdsort
Use cdsort.
Use cdsort (default).
.El
.Pp
.It Fl -color-diagnostics Ns = Ns Ar value
Expand Down
2 changes: 1 addition & 1 deletion lld/test/ELF/cgprofile-bad-clusters.s
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# RUN: echo "F G 6" >> %t.call_graph
# RUN: echo "G H 5" >> %t.call_graph
# RUN: echo "H I 4" >> %t.call_graph
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t2
# RUN: llvm-readobj --symbols %t2 | FileCheck %s

.section .text.A,"ax",@progbits
Expand Down
4 changes: 2 additions & 2 deletions lld/test/ELF/cgprofile-icf.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
# RUN: echo "A B 100" > %t.call_graph
# RUN: echo "A C 40" >> %t.call_graph
# RUN: echo "C D 61" >> %t.call_graph
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t.out -icf=all
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t.out -icf=all
# RUN: llvm-readobj --symbols %t.out | FileCheck %s
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2.out
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t2.out
# RUN: llvm-readobj --symbols %t2.out | FileCheck %s --check-prefix=NOICF

.section .text.D,"ax",@progbits
Expand Down
5 changes: 1 addition & 4 deletions lld/test/ELF/cgprofile-print.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# RUN: echo "B C 50" >> %t.call_graph
# RUN: echo "C D 40" >> %t.call_graph
# RUN: echo "D B 10" >> %t.call_graph
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --print-symbol-order=%t3
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --call-graph-profile-sort=hfsort --print-symbol-order=%t3
# RUN: FileCheck %s --input-file %t3

# CHECK: B
Expand All @@ -32,6 +32,3 @@ C:
.globl D
D:
nop



2 changes: 1 addition & 1 deletion lld/test/ELF/cgprofile-rela.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# REQUIRES: x86

# RUN: yaml2obj %s -o %t.o
# RUN: ld.lld %t.o -o %t
# RUN: ld.lld --call-graph-profile-sort=hfsort %t.o -o %t
# RUN: llvm-nm --no-sort %t | FileCheck %s
# RUN: ld.lld --no-call-graph-profile-sort %t.o -o %t
# RUN: llvm-nm --no-sort %t | FileCheck %s --check-prefix=NO-CG
Expand Down
5 changes: 1 addition & 4 deletions lld/test/ELF/cgprofile-reproduce.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# RUN: echo "B C 50" >> %t.call_graph
# RUN: echo "C D 40" >> %t.call_graph
# RUN: echo "D B 10" >> %t.call_graph
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --print-symbol-order=%t3
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2 --call-graph-profile-sort=hfsort --print-symbol-order=%t3
# RUN: ld.lld -e A %t --symbol-ordering-file %t3 -o %t2
# RUN: llvm-readobj --symbols %t2 | FileCheck %s

Expand Down Expand Up @@ -37,6 +37,3 @@ C:
.globl D
D:
nop



6 changes: 3 additions & 3 deletions lld/test/ELF/cgprofile-txt.s
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
# RUN: echo "TooManyPreds10 TooManyPreds 11" >> %t.call_graph
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=hfsort -o %t2
# RUN: llvm-readobj --symbols %t2 | FileCheck %s
## --call-graph-profile-sort=hfsort is the default.
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2b
# RUN: cmp %t2 %t2b

# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=cdsort -o %t2
# RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=CDSORT
## --call-graph-profile-sort=cdsort is the default.
# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2b
# RUN: cmp %t2 %t2b

# RUN: not ld.lld -e A %t --call-graph-ordering-file %t.call_graph --call-graph-profile-sort=sort \
# RUN: -o /dev/null 2>&1 | FileCheck %s --check-prefix=UNKNOWN
Expand Down

1 comment on commit d5c1d73

@asmok-g
Copy link

@asmok-g asmok-g commented on d5c1d73 Oct 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is failing lldb-arm-ubuntu checks and is now causing internal google cpp build benchmarks to fail building due to timeouts linking when target cpu is arm. Should this be reverted till checks are fixed ?

Please sign in to comment.