diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -18,7 +18,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/InitializePasses.h" using namespace llvm; @@ -37,6 +39,8 @@ class SIRemoveShortExecBranches : public MachineFunctionPass { private: const SIInstrInfo *TII = nullptr; + MachineLoopInfo *MLI = nullptr; + bool getBlockDestinations(MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB, MachineBasicBlock *&FalseMBB, @@ -53,11 +57,20 @@ } bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } // End anonymous namespace. -INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SIRemoveShortExecBranches, DEBUG_TYPE, + "SI remove short exec branches", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(SIRemoveShortExecBranches, DEBUG_TYPE, "SI remove short exec branches", false, false) char SIRemoveShortExecBranches::ID = 0; @@ -123,6 +136,7 @@ // Consider only the forward branches. if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) || + (MLI->getLoopFor(&SrcMBB) != MLI->getLoopFor(TrueMBB)) || mustRetainExeczBranch(*FalseMBB, *TrueMBB)) return false; @@ -136,6 +150,7 @@ bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); + MLI = &getAnalysis(); MF.RenumberBlocks(); bool Changed = false; diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -32,6 +32,7 @@ ; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec ; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3] +; CHECK-NEXT: s_cbranch_execz BB0_6 ; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec @@ -49,7 +50,7 @@ ; CHECK-NEXT: s_add_i32 s0, s0, 1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 ; CHECK-NEXT: s_branch BB0_1 -; CHECK-NEXT: ; %bb.6: ; %Flow2 +; CHECK-NEXT: BB0_6: ; %Flow2 ; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]