Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1276,23 +1276,30 @@ return SinkCast(CI); } +bool isIVIncrement(const BinaryOperator *BO, const LoopInfo *LI) { + auto *PN = dyn_cast(BO->getOperand(0)); + if (!PN) + return false; + const Loop *L = LI->getLoopFor(BO->getParent()); + if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) + return false; + const BasicBlock *Latch = L->getLoopLatch(); + if (PN->getIncomingValueForBlock(Latch) != BO) + return false; + if (auto *Step = dyn_cast(BO->getOperand(1))) + if (L->contains(Step->getParent())) + return false; + return true; +} + bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID) { - auto isIVIncrement = [this, &Cmp](BinaryOperator *BO) { - auto *PN = dyn_cast(BO->getOperand(0)); - if (!PN) + auto isSimpleIVIncrement = [this, &Cmp](BinaryOperator *BO) { + if (!isIVIncrement(BO, LI)) return false; const Loop *L = LI->getLoopFor(BO->getParent()); - if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) - return false; - const BasicBlock *Latch = L->getLoopLatch(); - if (PN->getIncomingValueForBlock(Latch) != BO) - return false; - if (auto *Step = dyn_cast(BO->getOperand(1))) - if (L->contains(Step->getParent())) - return false; // IV increment may have other users than the IV. We do not want to make // dominance queries to analyze the legality of moving it towards the cmp, // so just check that there is no other users. @@ -1305,9 +1312,9 @@ // cheap check because no CFG changes & dom tree recomputation happens // during the transform. Function *F = BO->getParent()->getParent(); - return getDT(*F).dominates(Cmp->getParent(), Latch); + return getDT(*F).dominates(Cmp->getParent(), L->getLoopLatch()); }; - if (BO->getParent() != Cmp->getParent() && !isIVIncrement(BO)) { + if (BO->getParent() != Cmp->getParent() && !isSimpleIVIncrement(BO)) { // We used to use a dominator tree here to allow multi-block optimization. // But that was problematic because: // 1. It could cause a perf regression by hoisting the math op into the @@ -3038,6 +3045,8 @@ const TargetLowering &TLI; const TargetRegisterInfo &TRI; const DataLayout &DL; + const LoopInfo &LI; + const DominatorTree &DT; /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and /// the memory instruction that we're computing this address for. @@ -3073,16 +3082,17 @@ AddressingModeMatcher( SmallVectorImpl &AMI, const TargetLowering &TLI, - const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, + const TargetRegisterInfo &TRI, const LoopInfo &LI, + const DominatorTree &DT, Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, std::pair, int64_t> &LargeOffsetGEP, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), - DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), - MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), - PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP), - OptSize(OptSize), PSI(PSI), BFI(BFI) { + DL(MI->getModule()->getDataLayout()), LI(LI), DT(DT), AccessTy(AT), + AddrSpace(AS), MemoryInst(MI), AddrMode(AM), + InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT), + LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) { IgnoreProfitability = false; } @@ -3097,18 +3107,17 @@ static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, SmallVectorImpl &AddrModeInsts, - const TargetLowering &TLI, const TargetRegisterInfo &TRI, - const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, - TypePromotionTransaction &TPT, + const TargetLowering &TLI, const LoopInfo &LI, const DominatorTree &DT, + const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, + InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, std::pair, int64_t> &LargeOffsetGEP, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS, - MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT, LargeOffsetGEP, - OptSize, PSI, BFI) - .matchAddr(V, 0); + bool Success = AddressingModeMatcher( + AddrModeInsts, TLI, TRI, LI, DT, AccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, + BFI).matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; } @@ -3806,9 +3815,10 @@ // to see if ScaleReg is actually X+C. If so, we can turn this into adding // X*Scale + C*Scale to addr mode. ConstantInt *CI = nullptr; Value *AddLHS = nullptr; - if (isa(ScaleReg) && // not a constant expr. + if (isa(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) && - CI->getValue().isSignedIntN(64)) { + CI->getValue().isSignedIntN(64) && + !isIVIncrement(cast(ScaleReg), &LI)) { TestAddrMode.InBounds = false; TestAddrMode.ScaledReg = AddLHS; TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale; @@ -3820,9 +3830,72 @@ AddrMode = TestAddrMode; return true; } + // Restore status quo. + TestAddrMode = AddrMode; } - // Otherwise, not (x+c)*scale, just return what we have. + auto GetIVStep = [this](const Value * V) + ->Optional > { + auto *PN = dyn_cast(V); + if (!PN) + return None; + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) + return None; + auto *IVInc = + dyn_cast(PN->getIncomingValueForBlock(L->getLoopLatch())); + if (!IVInc || !L->contains(IVInc->getParent())) + return None; + ConstantInt *Step; + if (match(IVInc, + m_ExtractValue<0>(m_Intrinsic( + m_Specific(PN), m_ConstantInt(Step))))) + return std::make_pair(IVInc, -Step->getValue()); + if (match(IVInc, + m_ExtractValue<0>(m_Intrinsic( + m_Specific(PN), m_ConstantInt(Step))))) + return std::make_pair(IVInc, Step->getValue()); + if (match(IVInc, m_Sub(m_Specific(PN), m_ConstantInt(Step)))) + return std::make_pair(IVInc, -Step->getValue()); + if (match(IVInc, m_Add(m_Specific(PN), m_ConstantInt(Step)))) + return std::make_pair(IVInc, Step->getValue()); + return None; + }; + + // Try to account for the following special case: + // 1. ScaleReg is an inductive variable; + // 2. We use it with non-zero offset; + // 3. IV's increment is available at the point of memory instruction. + // + // In this case, we may reuse the IV increment instead of the IV Phi to + // achieve the following advantages: + // 1. If IV step matches the offset, we will have no need in the offset; + // 2. Even if they don't match, we will reduce the overlap of living IV + // and IV increment, that will potentially lead to better register + // assignment. + if (AddrMode.BaseOffs) { + if (auto IVStep = GetIVStep(ScaleReg)) { + Instruction *IVInc = IVStep->first; + APInt Step = IVStep->second; + if (DT.dominates(IVInc, MemoryInst)) { + TestAddrMode.InBounds = false; + TestAddrMode.ScaledReg = IVInc; + TestAddrMode.BaseOffs -= Step.getLimitedValue() * AddrMode.Scale; + // If this addressing mode is legal, commit it and remember that we + // folded + // this instruction. + if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { + AddrModeInsts.push_back(cast(ScaleReg)); + AddrMode = TestAddrMode; + return true; + } + // Restore status quo. + TestAddrMode = AddrMode; + } + } + } + + // Otherwise, just return what we have. return true; } @@ -4912,9 +4985,10 @@ 0); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher( - MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI); + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, DT, + AddressAccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, + LargeOffsetGEP, OptSize, PSI, BFI); Matcher.IgnoreProfitability = true; bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -5017,9 +5091,10 @@ AddrModeInsts.clear(); std::pair, int64_t> LargeOffsetGEP(nullptr, 0); + Function *F = MemoryInst->getParent()->getParent(); ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, - InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, + V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDT(*F), + *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI.get()); GetElementPtrInst *GEP = LargeOffsetGEP.first; Index: llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll =================================================================== --- llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll +++ llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll @@ -5,16 +5,14 @@ define i32 @test_01(i32* %p, i64 %len, i32 %x) { ; CHECK-LABEL: test_01: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_1: ## %loop ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $1, %rax +; CHECK-NEXT: subq $1, %rsi ; CHECK-NEXT: jb LBB0_4 ; CHECK-NEXT: ## %bb.2: ## %backedge ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) -; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: cmpl %edx, (%rdi,%rsi,4) ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: ## %bb.3: ## %failure ; CHECK-NEXT: ud2 @@ -47,16 +45,14 @@ define i32 @test_01a(i32* %p, i64 %len, i32 %x) { ; CHECK-LABEL: test_01a: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_1: ## %loop ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $1, %rax +; CHECK-NEXT: subq $1, %rsi ; CHECK-NEXT: jb LBB1_4 ; CHECK-NEXT: ## %bb.2: ## %backedge ; CHECK-NEXT: ## in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: cmpl %edx, -28(%rdi,%rsi,4) -; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: cmpl %edx, -24(%rdi,%rsi,4) ; CHECK-NEXT: jne LBB1_1 ; CHECK-NEXT: ## %bb.3: ## %failure ; CHECK-NEXT: ud2 @@ -89,16 +85,14 @@ define i32 @test_02(i32* %p, i64 %len, i32 %x) { ; CHECK-LABEL: test_02: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB2_1: ## %loop ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $1, %rax +; CHECK-NEXT: subq $1, %rsi ; CHECK-NEXT: jb LBB2_4 ; CHECK-NEXT: ## %bb.2: ## %backedge ; CHECK-NEXT: ## in Loop: Header=BB2_1 Depth=1 -; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) -; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: cmpl %edx, (%rdi,%rsi,4) ; CHECK-NEXT: jne LBB2_1 ; CHECK-NEXT: ## %bb.3: ## %failure ; CHECK-NEXT: ud2 @@ -133,16 +127,14 @@ define i32 @test_03(i32* %p, i64 %len, i32 %x) { ; CHECK-LABEL: test_03: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB3_1: ## %loop ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subq $1, %rax +; CHECK-NEXT: subq $1, %rsi ; CHECK-NEXT: jb LBB3_4 ; CHECK-NEXT: ## %bb.2: ## %backedge ; CHECK-NEXT: ## in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) -; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: cmpl %edx, (%rdi,%rsi,4) ; CHECK-NEXT: jne LBB3_1 ; CHECK-NEXT: ## %bb.3: ## %failure ; CHECK-NEXT: ud2 Index: llvm/test/CodeGen/X86/uadd_inc_iv.ll =================================================================== --- llvm/test/CodeGen/X86/uadd_inc_iv.ll +++ llvm/test/CodeGen/X86/uadd_inc_iv.ll @@ -14,11 +14,10 @@ ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 ; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR3:%.*]] = mul i64 [[MATH]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* +; CHECK-NEXT: [[SUNKADDR4:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR3]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR4]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] ; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]] Index: llvm/test/CodeGen/X86/usub_inc_iv.ll =================================================================== --- llvm/test/CodeGen/X86/usub_inc_iv.ll +++ llvm/test/CodeGen/X86/usub_inc_iv.ll @@ -12,11 +12,10 @@ ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 ; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[MATH]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR1]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] ; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]] @@ -60,10 +59,10 @@ ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 ; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[MATH]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -28 +; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -24 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] @@ -108,11 +107,10 @@ ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 ; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[MATH]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR1]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] ; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]] @@ -161,11 +159,10 @@ ; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0 ; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV_NEXT]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR1]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] ; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE]], label [[LOOP]] @@ -272,11 +269,10 @@ ; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0 ; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV_NEXT]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR1]] to i32* ; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4 ; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] ; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]