diff --git a/bolt/lib/Passes/LoopInversionPass.cpp b/bolt/lib/Passes/LoopInversionPass.cpp --- a/bolt/lib/Passes/LoopInversionPass.cpp +++ b/bolt/lib/Passes/LoopInversionPass.cpp @@ -54,11 +54,16 @@ } } - assert(SecondSucc != nullptr && "Unable to find second BB successor"); - const uint64_t BBCount = SuccBB->getBranchInfo(*BB).Count; - const uint64_t OtherCount = SuccBB->getBranchInfo(*SecondSucc).Count; - if ((BBCount < OtherCount) && (BBIndex > SuccBBIndex)) + assert(SecondSucc != nullptr && "Unable to find a second BB successor"); + const uint64_t LoopCount = SuccBB->getBranchInfo(*BB).Count; + const uint64_t ExitCount = SuccBB->getBranchInfo(*SecondSucc).Count; + + if (LoopCount < ExitCount) { + if (BBIndex > SuccBBIndex) + continue; + } else if (BBIndex < SuccBBIndex) { continue; + } IsChanged = true; BB->setLayoutIndex(SuccBBIndex); diff --git a/bolt/test/X86/loop-inversion-pass.s b/bolt/test/X86/loop-inversion-pass.s --- a/bolt/test/X86/loop-inversion-pass.s +++ b/bolt/test/X86/loop-inversion-pass.s @@ -4,24 +4,33 @@ # RUN: %s -o %t.o # RUN: link_fdata %s %t.o %t.fdata # RUN: link_fdata %s %t.o %t.fdata2 "FDATA2" +# RUN: link_fdata %s %t.o %t.fdata3 "FDATA3" # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q # RUN: llvm-bolt %t.exe -data %t.fdata -reorder-blocks=cache+ -print-finalized \ # RUN: -loop-inversion-opt -o %t.out | FileCheck %s # RUN: llvm-bolt %t.exe -data %t.fdata2 -reorder-blocks=cache+ -print-finalized \ # RUN: -loop-inversion-opt -o %t.out2 | FileCheck --check-prefix="CHECK2" %s +# RUN: llvm-bolt %t.exe -data %t.fdata3 -reorder-blocks=none -print-finalized \ +# RUN: -loop-inversion-opt -o %t.out3 | FileCheck --check-prefix="CHECK3" %s -# The case where loop is used: +# The case where the loop is used: # FDATA: 1 main 2 1 main #.J1# 0 420 # FDATA: 1 main b 1 main #.Jloop# 0 420 # FDATA: 1 main b 1 main d 0 1 # CHECK: BB Layout : .LBB00, .Ltmp0, .Ltmp1, .LFT0 -# The case where loop is unused: +# The case where the loop is unused: # FDATA2: 1 main 2 1 main #.J1# 0 420 # FDATA2: 1 main b 1 main #.Jloop# 0 1 # FDATA2: 1 main b 1 main d 0 420 # CHECK2: BB Layout : .LBB00, .Ltmp1, .LFT0, .Ltmp0 +# The case where the loop does not require rotation: +# FDATA3: 1 main 2 1 main #.J1# 0 420 +# FDATA3: 1 main b 1 main #.Jloop# 0 420 +# FDATA3: 1 main b 1 main d 0 1 +# CHECK3: BB Layout : .LBB00, .Ltmp0, .Ltmp1, .LFT0 + .text .globl main .type main, %function