Index: lib/Transforms/Utils/BypassSlowDivision.cpp =================================================================== --- lib/Transforms/Utils/BypassSlowDivision.cpp +++ lib/Transforms/Utils/BypassSlowDivision.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -43,6 +44,8 @@ DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) : Quotient(InQuotient), Remainder(InRemainder) {} }; + + enum ValRng { VALRNG_SHORT, VALRNG_UNKNOWN, VALRNG_LONG }; } namespace llvm { @@ -72,6 +75,31 @@ typedef DenseMap DivCacheTy; } +/// \brief Check if the value may fit the shorter type. +/// +/// The function tests if the operand is known to fit the shorter type or if it +/// is known to be too wide. In the former case the runtime check can be +/// simplified, in the latter case bypassing should be avoided. +static ValRng getValueRange(Value *Op, IntegerType *BypassType, + const DataLayout &DL) { + unsigned shortLen = BypassType->getBitWidth(); + unsigned longLen = cast(Op->getType())->getBitWidth(); + unsigned nHiBits = longLen - shortLen; + APInt Zeros(longLen, 0), Ones(longLen, 0); + + computeKnownBits(Op, Zeros, Ones, DL); + + if (Ones.countLeadingZeros() < nHiBits) { + return VALRNG_LONG; + } + + if (Zeros.countLeadingOnes() >= nHiBits) { + return VALRNG_SHORT; + } + + return VALRNG_UNKNOWN; +} + // insertFastDiv - Substitutes the div/rem instruction with code that checks the // value of the operands and uses a shorter-faster div/rem instruction when // possible and the longer-slower div/rem instruction otherwise. @@ -79,45 +107,81 @@ bool UseDivOp, bool UseSignedOp, DivCacheTy &PerBBDivCache) { Function *F = I->getParent()->getParent(); + const DataLayout &DL = I->getModule()->getDataLayout(); + // Get instruction operands Value *Dividend = I->getOperand(0); Value *Divisor = I->getOperand(1); if (isa(Divisor)) { - // Division by a constant should have been been solved and replaced earlier - // in the pipeline. + // Keep division by a constant for DAGCombiner. return false; } - // If the numerator is a constant, bail if it doesn't fit into BypassType. - if (ConstantInt *ConstDividend = dyn_cast(Dividend)) - if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) - return false; + ValRng DividendRng = getValueRange(Dividend, BypassType, DL); + if (DividendRng == VALRNG_LONG) + return false; + + ValRng DivisorRng = getValueRange(Divisor, BypassType, DL); + if (DivisorRng == VALRNG_LONG) + return false; + + bool DividendShort = (DividendRng == VALRNG_SHORT); + bool DivisorShort = (DivisorRng == VALRNG_SHORT); + + // If both operands are known to be short then just replace the long division + // with a short one. + if (DividendShort && DivisorShort) { + IRBuilder<> Builder(I); + Value *trDividend = Builder.CreateTrunc(Dividend, BypassType); + Value *trDivisor = Builder.CreateTrunc(Divisor, BypassType); + Value *trResult = UseDivOp ? Builder.CreateUDiv(trDividend, trDivisor) + : Builder.CreateURem(trDividend, trDivisor); + Value *Result = Builder.CreateZExt(trResult, I->getType()); + I->replaceAllUsesWith(Result); + I->eraseFromParent(); + return true; + } // Basic Block is split before divide - BasicBlock *MainBB = &*I->getParent(); + BasicBlock *MainBB = I->getParent(); BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); - - // Add new basic block for slow divide operation - BasicBlock *SlowBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - SlowBB->moveBefore(SuccessorBB); - IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); - Value *SlowQuotientV; - Value *SlowRemainderV; - if (UseSignedOp) { - SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + Value *ZeroV = ConstantInt::get(I->getType(), 0); + + // If the division is unsigned and Dividend is known to be short then either + // 1) Divisor is less or equal to Dividend and the result can be computed + // with a short division. + // 2) Divisor is greater than Dividend. Then the (long) division is not + // needed at all. In this case the quotient is 0 and the remainder is + // equal to Dividend. + bool ShortCut = DividendShort && !UseSignedOp; + + // New basic block for slow divide operation + BasicBlock *SlowBB = nullptr; + Value *SlowQuotientV = nullptr; + Value *SlowRemainderV = nullptr; + + if (ShortCut) { + SlowQuotientV = ZeroV; + SlowRemainderV = Dividend; } else { - SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + SlowBB = BasicBlock::Create(F->getContext(), "", MainBB->getParent(), + SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); } - SlowBuilder.CreateBr(SuccessorBB); // Add new basic block for fast divide operation BasicBlock *FastBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - FastBB->moveBefore(SlowBB); + BasicBlock::Create(F->getContext(), "", MainBB->getParent(), + ShortCut ? SuccessorBB : SlowBB); IRBuilder<> FastBuilder(FastBB, FastBB->begin()); Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, BypassType); @@ -139,10 +203,10 @@ // Phi nodes for result of div and rem IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(SlowQuotientV, ShortCut ? MainBB : SlowBB); QuoPhi->addIncoming(FastQuotientV, FastBB); PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(SlowRemainderV, ShortCut ? MainBB : SlowBB); RemPhi->addIncoming(FastRemainderV, FastBB); // Replace I with appropriate phi node @@ -152,7 +216,6 @@ I->replaceAllUsesWith(RemPhi); I->eraseFromParent(); - // Combine operands into a single value with OR for value testing below MainBB->getInstList().back().eraseFromParent(); IRBuilder<> MainBuilder(MainBB, MainBB->end()); @@ -161,21 +224,30 @@ // OR'ed together. assert(!isa(Divisor)); - Value *OrV; - if (!isa(Dividend)) - OrV = MainBuilder.CreateOr(Dividend, Divisor); - else - OrV = Divisor; - - // BitMask is inverted to check if the operands are - // larger than the bypass type - uint64_t BitMask = ~BypassType->getBitMask(); - Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + // If CmpV is true then do the short division. + Value *CmpV = nullptr; + if (ShortCut) { + CmpV = MainBuilder.CreateICmpUGE(Dividend, Divisor); + } else { + // Combine operands into a single value with OR for value testing below + Value *OrV = nullptr; + if (!DividendShort && !DivisorShort) + OrV = MainBuilder.CreateOr(Dividend, Divisor); + else if (!DividendShort) + OrV = Dividend; + else + OrV = Divisor; + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values + CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + } - // Compare operand values and branch - Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); - Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); - MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + MainBuilder.CreateCondBr(CmpV, FastBB, ShortCut ? SuccessorBB : SlowBB); // Cache phi nodes to be used later in place of other instances // of div or rem with the same sign, dividend, and divisor Index: test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- test/CodeGen/X86/bypass-slow-division-64.ll +++ test/CodeGen/X86/bypass-slow-division-64.ll @@ -52,3 +52,56 @@ %result = add i64 %resultdiv, %resultrem ret i64 %result } + +; No bypassing should be done in apparently unsuitable cases. +define i64 @Test_no_bypassing(i32 %a, i64 %b) nounwind { +; CHECK-LABEL: Test_no_bypassing: +; CHECK-NOT: divl + %a.1 = zext i32 %a to i64 + ; %a.2 is always negative so the division cannot be bypassed. + %a.2 = sub i64 -1, %a.1 + %res = srem i64 %a.2, %b + ret i64 %res +} + +; A simplified run-time check when divisor is known to be short. +define i64 @Test_check_one_operand(i64 %a, i32 %b) nounwind { +; CHECK-LABEL: Test_check_one_operand: +; CHECK-NOT: orq +; CHECK-DAG: divl +; CHECK-DAG: idivq + %b.1 = zext i32 %b to i64 + %res = sdiv i64 %a, %b.1 + ret i64 %res +} + +; A bit of a contrived test to replace a long division with a short one without +; CFG modification. +define i64 @Test_check_none(i64 %a, i32 %b) nounwind { +; CHECK-LABEL: Test_check_none: +; CHECK-NOT: j +; CHECK-NOT: divq +; CHECK: divl +; CHECK-NOT: div + %a.1 = and i64 %a, 4294967295 + %b.1 = zext i32 %b to i64 + %res = udiv i64 %a.1, %b.1 + ret i64 %res +} + +; Special case to completely get rid of an unsigned long division if the +; dividend is known to be short enough. +define i64 @Test_special_case(i32 %a, i64 %b) nounwind { +; CHECK-LABEL: Test_special_case: +; CHECK-NOT: shrq +; CHECK-NOT: orq +; CHECK: cmpq +; CHECK-NOT: divq +; CHECK: divl +; CHECK-NOT: div + %a.1 = zext i32 %a to i64 + %div = udiv i64 %a.1, %b + %rem = urem i64 %a.1, %b + %res = add i64 %div, %rem + ret i64 %res +}