diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index bc67117113719..0f55af3b6eddf 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -347,9 +347,19 @@ static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI, // probabilities as if there are only 0-trip and 1-trip cases. ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight; } + } else { + // Theoretically, if the loop body must be executed at least once, the + // backedge count must be not less than exit count. However the branch + // weight collected by sampling-based PGO may be not very accurate due to + // sampling. Therefore this workaround is required here to avoid underflow + // of unsigned in following update of branch weight. + if (OrigLoopExitWeight > OrigLoopBackedgeWeight) + OrigLoopBackedgeWeight = OrigLoopExitWeight; } + assert(OrigLoopExitWeight >= ExitWeight0 && "Bad branch weight"); ExitWeight1 = OrigLoopExitWeight - ExitWeight0; EnterWeight = ExitWeight1; + assert(OrigLoopBackedgeWeight >= EnterWeight && "Bad branch weight"); LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight; } else if (OrigLoopExitWeight == 0) { if (OrigLoopBackedgeWeight == 0) { diff --git a/llvm/test/Transforms/LoopRotate/update-branch-weights.ll b/llvm/test/Transforms/LoopRotate/update-branch-weights.ll index acb2038d17bb8..9a1f36ec5ff2b 100644 --- a/llvm/test/Transforms/LoopRotate/update-branch-weights.ll +++ b/llvm/test/Transforms/LoopRotate/update-branch-weights.ll @@ -240,7 +240,7 @@ loop_exit: ; BFI_AFTER-LABEL: block-frequency-info: func6_inaccurate_branch_weight ; BFI_AFTER: - entry: {{.*}} count = 1024 -; BFI_AFTER: - loop_body: {{.*}} count = 4294967296 +; BFI_AFTER: - loop_body: {{.*}} count = 1024 ; BFI_AFTER: - loop_exit: {{.*}} count = 1024 ; IR-LABEL: define void @func6_inaccurate_branch_weight( @@ -292,4 +292,4 @@ loop_exit: ; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1} ; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0} ; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0} -; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 -1, i32 1024} +; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 0, i32 1024}