diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -613,6 +613,10 @@ /// with instruction generated for signed comparison. virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } + /// Return true if the heuristic to prefer icmp eq zero should be used in code + /// gen prepare. + virtual bool preferZeroCompareBranch() const { return false; } + /// Return true if it is safe to transform an integer-domain bitwise operation /// into the equivalent floating-point operation. This should be set to true /// if the target has IEEE-754-compliant fabs/fneg operations for the input diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7688,6 +7688,67 @@ return true; } +static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) { + // Try and convert + // %c = icmp ult %x, 8 + // br %c, bla, blb + // %tc = lshr %x, 3 + // to + // %tc = lshr %x, 3 + // %c = icmp eq %tc, 0 + // br %c, bla, blb + // Creating the cmp to zero can be better for the backend, especially if the + // lshr produces flags that can be used automatically. + if (!TLI.preferZeroCompareBranch() || !Branch->isConditional()) + return false; + + ICmpInst *Cmp = dyn_cast(Branch->getCondition()); + if (!Cmp || !isa(Cmp->getOperand(1)) || !Cmp->hasOneUse()) + return false; + + Value *X = Cmp->getOperand(0); + APInt CmpC = cast(Cmp->getOperand(1))->getValue(); + + for (auto *U : X->users()) { + Instruction *UI = dyn_cast(U); + // A quick dominance check + if (!UI || + (UI->getParent() != Branch->getParent() && + UI->getParent() != Branch->getSuccessor(0) && + UI->getParent() != Branch->getSuccessor(1)) || + (UI->getParent() != Branch->getParent() && + !UI->getParent()->getSinglePredecessor())) + continue; + + if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT && + match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) { + IRBuilder<> Builder(Branch); + if (UI->getParent() != Branch->getParent()) + UI->moveBefore(Branch); + Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, + ConstantInt::get(UI->getType(), 0)); + LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); + LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); + Cmp->replaceAllUsesWith(NewCmp); + return true; + } + if (Cmp->isEquality() && + (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || + match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { + IRBuilder<> Builder(Branch); + if (UI->getParent() != Branch->getParent()) + UI->moveBefore(Branch); + Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, + ConstantInt::get(UI->getType(), 0)); + LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); + LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); + Cmp->replaceAllUsesWith(NewCmp); + return true; + } + } + return false; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -7849,6 +7910,8 @@ return optimizeSwitchInst(cast(I)); case Instruction::ExtractElement: return optimizeExtractElementInst(cast(I)); + case Instruction::Br: + return optimizeBranch(cast(I), *TLI); } return false; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -573,6 +573,8 @@ Sched::Preference getSchedulingPreference(SDNode *N) const override; + bool preferZeroCompareBranch() const override { return true; } + bool isShuffleMaskLegal(ArrayRef M, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/llvm/test/CodeGen/ARM/branch-on-zero.ll b/llvm/test/CodeGen/ARM/branch-on-zero.ll --- a/llvm/test/CodeGen/ARM/branch-on-zero.ll +++ b/llvm/test/CodeGen/ARM/branch-on-zero.ll @@ -96,29 +96,26 @@ define i32 @test_lshr2(i32* nocapture %x, i32* nocapture readonly %y, i32 %n) { ; CHECK-V6M-LABEL: test_lshr2: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: cmp r2, #4 -; CHECK-V6M-NEXT: blo .LBB1_3 -; CHECK-V6M-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-V6M-NEXT: lsrs r2, r2, #2 -; CHECK-V6M-NEXT: .LBB1_2: @ %while.body +; CHECK-V6M-NEXT: beq .LBB1_2 +; CHECK-V6M-NEXT: .LBB1_1: @ %while.body ; CHECK-V6M-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-V6M-NEXT: ldm r1!, {r3} ; CHECK-V6M-NEXT: lsls r3, r3, #1 ; CHECK-V6M-NEXT: stm r0!, {r3} ; CHECK-V6M-NEXT: subs r2, r2, #1 -; CHECK-V6M-NEXT: bne .LBB1_2 -; CHECK-V6M-NEXT: .LBB1_3: @ %while.end +; CHECK-V6M-NEXT: bne .LBB1_1 +; CHECK-V6M-NEXT: .LBB1_2: @ %while.end ; CHECK-V6M-NEXT: movs r0, #0 ; CHECK-V6M-NEXT: bx lr ; ; CHECK-V7M-LABEL: test_lshr2: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: cmp r2, #4 -; CHECK-V7M-NEXT: blo .LBB1_3 +; CHECK-V7M-NEXT: lsrs r2, r2, #2 +; CHECK-V7M-NEXT: beq .LBB1_3 ; CHECK-V7M-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-V7M-NEXT: subs r1, #4 ; CHECK-V7M-NEXT: subs r0, #4 -; CHECK-V7M-NEXT: lsrs r2, r2, #2 ; CHECK-V7M-NEXT: .LBB1_2: @ %while.body ; CHECK-V7M-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-V7M-NEXT: ldr r3, [r1, #4]! @@ -134,24 +131,23 @@ ; CHECK-V81M: @ %bb.0: @ %entry ; CHECK-V81M-NEXT: .save {r7, lr} ; CHECK-V81M-NEXT: push {r7, lr} -; CHECK-V81M-NEXT: cmp r2, #4 -; CHECK-V81M-NEXT: blo .LBB1_3 -; CHECK-V81M-NEXT: @ %bb.1: @ %while.body.preheader -; CHECK-V81M-NEXT: lsr.w lr, r2, #2 -; CHECK-V81M-NEXT: .LBB1_2: @ %while.body +; CHECK-V81M-NEXT: lsrs r2, r2, #2 +; CHECK-V81M-NEXT: wls lr, r2, .LBB1_2 +; CHECK-V81M-NEXT: .LBB1_1: @ %while.body ; CHECK-V81M-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-V81M-NEXT: ldr r2, [r1], #4 ; CHECK-V81M-NEXT: lsls r2, r2, #1 ; CHECK-V81M-NEXT: str r2, [r0], #4 -; CHECK-V81M-NEXT: le lr, .LBB1_2 -; CHECK-V81M-NEXT: .LBB1_3: @ %while.end +; CHECK-V81M-NEXT: le lr, .LBB1_1 +; CHECK-V81M-NEXT: .LBB1_2: @ %while.end ; CHECK-V81M-NEXT: movs r0, #0 ; CHECK-V81M-NEXT: pop {r7, pc} ; ; CHECK-V7A-LABEL: test_lshr2: ; CHECK-V7A: @ %bb.0: @ %entry -; CHECK-V7A-NEXT: cmp r2, #4 -; CHECK-V7A-NEXT: blo .LBB1_3 +; CHECK-V7A-NEXT: mov r3, #0 +; CHECK-V7A-NEXT: cmp r3, r2, lsr #2 +; CHECK-V7A-NEXT: beq .LBB1_3 ; CHECK-V7A-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-V7A-NEXT: lsr r2, r2, #2 ; CHECK-V7A-NEXT: .LBB1_2: @ %while.body diff --git a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll --- a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll +++ b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -7,18 +7,16 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: subs r4, r0, #1 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: .LBB0_1: @ %bb.nph -; CHECK-NEXT: subs r4, r0, #1 -; CHECK-NEXT: .LBB0_2: @ %bb +; CHECK-NEXT: .LBB0_1: @ %bb ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bl f ; CHECK-NEXT: bl g ; CHECK-NEXT: subs r4, #1 -; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.3: @ %return +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %return ; CHECK-NEXT: pop {r4, pc} entry: %0 = icmp eq i32 %n, 1 ; [#uses=1] @@ -50,26 +48,22 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: subs r4, r0, #1 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: .LBB1_1: @ %bb.nph -; CHECK-NEXT: subs r4, r0, #1 -; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_2: @ %bb2 -; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1 -; CHECK-NEXT: subs r4, #1 -; CHECK-NEXT: beq .LBB1_5 -; CHECK-NEXT: .LBB1_3: @ %bb +; CHECK-NEXT: .LBB1_1: @ %bb ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmp r4, #2 -; CHECK-NEXT: blt .LBB1_2 -; CHECK-NEXT: @ %bb.4: @ %bb1 -; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1 +; CHECK-NEXT: blt .LBB1_3 +; CHECK-NEXT: @ %bb.2: @ %bb1 +; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: bl f ; CHECK-NEXT: bl g -; CHECK-NEXT: b .LBB1_2 -; CHECK-NEXT: .LBB1_5: @ %return +; CHECK-NEXT: .LBB1_3: @ %bb2 +; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: subs r4, #1 +; CHECK-NEXT: bne .LBB1_1 +; CHECK-NEXT: @ %bb.4: @ %return ; CHECK-NEXT: pop {r4, pc} entry: %0 = icmp eq i32 %n, 1 ; [#uses=1] diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll --- a/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll @@ -7,12 +7,12 @@ define i32 @lshr3_then(i32 %a) { ; CHECK-LABEL: @lshr3_then( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A:%.*]], 8 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A:%.*]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i32 0 ; CHECK: else: -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A]], 3 ; CHECK-NEXT: ret i32 [[L]] ; entry: @@ -30,10 +30,10 @@ define i32 @lshr5_else(i32 %a) { ; CHECK-LABEL: @lshr5_else( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A:%.*]], 32 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A]], 5 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0 @@ -54,8 +54,8 @@ ; CHECK-LABEL: @lshr2_entry( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A:%.*]], 1 -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 2 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: @@ -99,10 +99,10 @@ define i32 @ashr5_else(i32 %a) { ; CHECK-LABEL: @ashr5_else( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A:%.*]], 32 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = ashr i32 [[A:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = ashr i32 [[A]], 5 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0 @@ -145,10 +145,10 @@ define i32 @addm10_then(i32 %a) { ; CHECK-LABEL: @addm10_then( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A:%.*]], 10 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = add i32 [[A:%.*]], -10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = add i32 [[A]], -10 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0 @@ -191,12 +191,12 @@ define i32 @sub10_else(i32 %a) { ; CHECK-LABEL: @sub10_else( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A:%.*]], 10 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = sub i32 [[A:%.*]], 10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i32 0 ; CHECK: else: -; CHECK-NEXT: [[L:%.*]] = sub i32 [[A]], 10 ; CHECK-NEXT: ret i32 [[L]] ; entry: @@ -214,10 +214,10 @@ define i32 @subm10_then(i32 %a) { ; CHECK-LABEL: @subm10_then( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A:%.*]], -10 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = sub i32 [[A:%.*]], -10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = sub i32 [[A]], -10 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0 @@ -237,12 +237,12 @@ define i64 @lshr64(i64 %a) { ; CHECK-LABEL: @lshr64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i64 [[A:%.*]], 1099511627776 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = lshr i64 [[A:%.*]], 40 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i64 0 ; CHECK: else: -; CHECK-NEXT: [[L:%.*]] = lshr i64 [[A]], 40 ; CHECK-NEXT: ret i64 [[L]] ; entry: @@ -260,12 +260,12 @@ define i128 @lshr128(i128 %a) { ; CHECK-LABEL: @lshr128( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i128 [[A:%.*]], 36893488147419103232 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = lshr i128 [[A:%.*]], 65 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i128 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i128 0 ; CHECK: else: -; CHECK-NEXT: [[L:%.*]] = lshr i128 [[A]], 65 ; CHECK-NEXT: ret i128 [[L]] ; entry: