Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -612,6 +612,10 @@ /// with instruction generated for signed comparison. virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } + /// Return true if the heuristic to prefer icmp eq zero should be used in code + /// gen prepare. + virtual bool preferZeroCompareBranch() const { return false; } + /// Return true if it is safe to transform an integer-domain bitwise operation /// into the equivalent floating-point operation. This should be set to true /// if the target has IEEE-754-compliant fabs/fneg operations for the input Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7688,6 +7688,61 @@ return true; } +static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) { + // Try and convert + // %c = icmp ult %x, 8 + // br %c, bla, blb + // %tc = lshr %x, 3 + // to + // %tc = lshr %x, 3 + // %c = icmp eq %tc, 0 + // br %c, bla, blb + // Creating the cmp to zero can be better for the backend, especially if the + // lshr produces flags that can be used automatically. + if (!TLI.preferZeroCompareBranch() || !Branch->isConditional()) + return false; + + ICmpInst *Cmp = dyn_cast(Branch->getCondition()); + if (!Cmp || !isa(Cmp->getOperand(1)) || !Cmp->hasOneUse()) + return false; + + Value *X = Cmp->getOperand(0); + unsigned CmpC = cast(Cmp->getOperand(1))->getZExtValue(); + + for (auto *U : X->users()) { + Instruction *UI = dyn_cast(U); + // A quick dominance check + if (!UI || (UI->getParent() != Branch->getParent() && + UI->getParent() != Branch->getSuccessor(0) && + UI->getParent() != Branch->getSuccessor(1))) + continue; + + if (isPowerOf2_64(CmpC) && Cmp->getPredicate() == ICmpInst::ICMP_ULT && + match(UI, m_Shr(m_Specific(X), m_SpecificInt(log2(CmpC))))) { + IRBuilder<> Builder(Branch); + if (UI->getParent() != Branch->getParent()) + UI->moveBefore(Branch); + Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, + ConstantInt::get(UI->getType(), 0)); + Cmp->replaceAllUsesWith(NewCmp); + return true; + } + if ((Cmp->getPredicate() == ICmpInst::ICMP_EQ || + Cmp->getPredicate() == ICmpInst::ICMP_NE) && + (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || + match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { + IRBuilder<> Builder(Branch); + if (UI->getParent() != Branch->getParent()) + UI->moveBefore(Branch); + Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, + ConstantInt::get(UI->getType(), 0)); + Cmp->replaceAllUsesWith(NewCmp); + return true; + } + } + return false; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -7849,6 +7904,8 @@ return optimizeSwitchInst(cast(I)); case Instruction::ExtractElement: return optimizeExtractElementInst(cast(I)); + case Instruction::Br: + return optimizeBranch(cast(I), *TLI); } return false; Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -563,6 +563,8 @@ Sched::Preference getSchedulingPreference(SDNode *N) const override; + bool preferZeroCompareBranch() const override { return true; } + bool isShuffleMaskLegal(ArrayRef M, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; Index: llvm/test/CodeGen/ARM/branch-on-zero.ll =================================================================== --- llvm/test/CodeGen/ARM/branch-on-zero.ll +++ llvm/test/CodeGen/ARM/branch-on-zero.ll @@ -96,29 +96,26 @@ define i32 @test_lshr2(i32* nocapture %x, i32* nocapture readonly %y, i32 %n) { ; CHECK-V6M-LABEL: test_lshr2: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: cmp r2, #4 -; CHECK-V6M-NEXT: blo .LBB1_3 -; CHECK-V6M-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-V6M-NEXT: lsrs r2, r2, #2 -; CHECK-V6M-NEXT: .LBB1_2: @ %while.body +; CHECK-V6M-NEXT: beq .LBB1_2 +; CHECK-V6M-NEXT: .LBB1_1: @ %while.body ; CHECK-V6M-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-V6M-NEXT: ldm r1!, {r3} ; CHECK-V6M-NEXT: lsls r3, r3, #1 ; CHECK-V6M-NEXT: stm r0!, {r3} ; CHECK-V6M-NEXT: subs r2, r2, #1 -; CHECK-V6M-NEXT: bne .LBB1_2 -; CHECK-V6M-NEXT: .LBB1_3: @ %while.end +; CHECK-V6M-NEXT: bne .LBB1_1 +; CHECK-V6M-NEXT: .LBB1_2: @ %while.end ; CHECK-V6M-NEXT: movs r0, #0 ; CHECK-V6M-NEXT: bx lr ; ; CHECK-V7M-LABEL: test_lshr2: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: cmp r2, #4 -; CHECK-V7M-NEXT: blo .LBB1_3 +; CHECK-V7M-NEXT: lsrs r2, r2, #2 +; CHECK-V7M-NEXT: beq .LBB1_3 ; CHECK-V7M-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-V7M-NEXT: subs r1, #4 ; CHECK-V7M-NEXT: subs r0, #4 -; CHECK-V7M-NEXT: lsrs r2, r2, #2 ; CHECK-V7M-NEXT: .LBB1_2: @ %while.body ; CHECK-V7M-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-V7M-NEXT: ldr r3, [r1, #4]! @@ -134,24 +131,23 @@ ; CHECK-V81M: @ %bb.0: @ %entry ; CHECK-V81M-NEXT: .save {r7, lr} ; CHECK-V81M-NEXT: push {r7, lr} -; CHECK-V81M-NEXT: cmp r2, #4 -; CHECK-V81M-NEXT: blo .LBB1_3 -; CHECK-V81M-NEXT: @ %bb.1: @ %while.body.preheader -; CHECK-V81M-NEXT: lsr.w lr, r2, #2 -; CHECK-V81M-NEXT: .LBB1_2: @ %while.body +; CHECK-V81M-NEXT: lsrs r2, r2, #2 +; CHECK-V81M-NEXT: wls lr, r2, .LBB1_2 +; CHECK-V81M-NEXT: .LBB1_1: @ %while.body ; CHECK-V81M-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-V81M-NEXT: ldr r2, [r1], #4 ; CHECK-V81M-NEXT: lsls r2, r2, #1 ; CHECK-V81M-NEXT: str r2, [r0], #4 -; CHECK-V81M-NEXT: le lr, .LBB1_2 -; CHECK-V81M-NEXT: .LBB1_3: @ %while.end +; CHECK-V81M-NEXT: le lr, .LBB1_1 +; CHECK-V81M-NEXT: .LBB1_2: @ %while.end ; CHECK-V81M-NEXT: movs r0, #0 ; CHECK-V81M-NEXT: pop {r7, pc} ; ; CHECK-V7A-LABEL: test_lshr2: ; CHECK-V7A: @ %bb.0: @ %entry -; CHECK-V7A-NEXT: cmp r2, #4 -; CHECK-V7A-NEXT: blo .LBB1_3 +; CHECK-V7A-NEXT: mov r3, #0 +; CHECK-V7A-NEXT: cmp r3, r2, lsr #2 +; CHECK-V7A-NEXT: beq .LBB1_3 ; CHECK-V7A-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-V7A-NEXT: lsr r2, r2, #2 ; CHECK-V7A-NEXT: .LBB1_2: @ %while.body Index: llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll =================================================================== --- llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll +++ llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -7,18 +7,16 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: subs r4, r0, #1 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: .LBB0_1: @ %bb.nph -; CHECK-NEXT: subs r4, r0, #1 -; CHECK-NEXT: .LBB0_2: @ %bb +; CHECK-NEXT: .LBB0_1: @ %bb ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bl f ; CHECK-NEXT: bl g ; CHECK-NEXT: subs r4, #1 -; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.3: @ %return +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %return ; CHECK-NEXT: pop {r4, pc} entry: %0 = icmp eq i32 %n, 1 ; [#uses=1] @@ -50,26 +48,22 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: subs r4, r0, #1 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: .LBB1_1: @ %bb.nph -; CHECK-NEXT: subs r4, r0, #1 -; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_2: @ %bb2 -; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1 -; CHECK-NEXT: subs r4, #1 -; CHECK-NEXT: beq .LBB1_5 -; CHECK-NEXT: .LBB1_3: @ %bb +; CHECK-NEXT: .LBB1_1: @ %bb ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmp r4, #2 -; CHECK-NEXT: blt .LBB1_2 -; CHECK-NEXT: @ %bb.4: @ %bb1 -; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1 +; CHECK-NEXT: blt .LBB1_3 +; CHECK-NEXT: @ %bb.2: @ %bb1 +; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: bl f ; CHECK-NEXT: bl g -; CHECK-NEXT: b .LBB1_2 -; CHECK-NEXT: .LBB1_5: @ %return +; CHECK-NEXT: .LBB1_3: @ %bb2 +; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: subs r4, #1 +; CHECK-NEXT: bne .LBB1_1 +; CHECK-NEXT: @ %bb.4: @ %return ; CHECK-NEXT: pop {r4, pc} entry: %0 = icmp eq i32 %n, 1 ; [#uses=1] Index: llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll +++ llvm/test/Transforms/CodeGenPrepare/ARM/branch-on-zero.ll @@ -7,12 +7,12 @@ define i32 @lshr3_then(i32 %a) { ; CHECK-LABEL: @lshr3_then( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A:%.*]], 8 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A:%.*]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i32 0 ; CHECK: else: -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A]], 3 ; CHECK-NEXT: ret i32 [[L]] ; entry: @@ -30,10 +30,10 @@ define i32 @lshr5_else(i32 %a) { ; CHECK-LABEL: @lshr5_else( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A:%.*]], 32 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A]], 5 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0 @@ -54,8 +54,8 @@ ; CHECK-LABEL: @lshr2_entry( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = lshr i32 [[A:%.*]], 1 -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], 2 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: @@ -99,10 +99,10 @@ define i32 @ashr5_else(i32 %a) { ; CHECK-LABEL: @ashr5_else( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A:%.*]], 32 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = ashr i32 [[A:%.*]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = ashr i32 [[A]], 5 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0 @@ -145,10 +145,10 @@ define i32 @addm10_then(i32 %a) { ; CHECK-LABEL: @addm10_then( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A:%.*]], 10 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = add i32 [[A:%.*]], -10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = add i32 [[A]], -10 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0 @@ -191,12 +191,12 @@ define i32 @sub10_else(i32 %a) { ; CHECK-LABEL: @sub10_else( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A:%.*]], 10 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = sub i32 [[A:%.*]], 10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i32 0 ; CHECK: else: -; CHECK-NEXT: [[L:%.*]] = sub i32 [[A]], 10 ; CHECK-NEXT: ret i32 [[L]] ; entry: @@ -214,10 +214,10 @@ define i32 @subm10_then(i32 %a) { ; CHECK-LABEL: @subm10_then( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A:%.*]], -10 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[L:%.*]] = sub i32 [[A:%.*]], -10 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[L]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[L:%.*]] = sub i32 [[A]], -10 ; CHECK-NEXT: ret i32 [[L]] ; CHECK: else: ; CHECK-NEXT: ret i32 0