Index: lib/Transforms/Utils/BypassSlowDivision.cpp =================================================================== --- lib/Transforms/Utils/BypassSlowDivision.cpp +++ lib/Transforms/Utils/BypassSlowDivision.cpp @@ -36,12 +36,28 @@ : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} }; - struct DivPhiNodes { - PHINode *Quotient; - PHINode *Remainder; + /// A resulting pair of div/rem operations. Typically, they are PHINodes in + /// the successor basic block. + struct DivRemResult { + Value *Quotient; + Value *Remainder; - DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) - : Quotient(InQuotient), Remainder(InRemainder) {} + DivRemResult() : Quotient(nullptr), Remainder(nullptr) {} + DivRemResult(Value *InQuotient, Value *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + + bool empty() { return !Quotient; } + }; + + /// This structure is used to describe incoming Div/Rem pairs. Based on the + /// data, a pair of PHINodes for Div and Rem results are created in the + /// successor basic block. + struct IncomingDivRemPair { + // PHINode predecessor for the following values + BasicBlock *BB = nullptr; + // Div/Rem values + Value *Quotient = nullptr; + Value *Remainder = nullptr; }; } @@ -69,92 +85,180 @@ } }; - typedef DenseMap DivCacheTy; + typedef DenseMap DivCacheTy; + typedef DenseMap BypassWidthsTy; } -// insertFastDiv - Substitutes the div/rem instruction with code that checks the -// value of the operands and uses a shorter-faster div/rem instruction when -// possible and the longer-slower div/rem instruction otherwise. -static bool insertFastDiv(Instruction *I, IntegerType *BypassType, - bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) { - Function *F = I->getParent()->getParent(); - // Get instruction operands - Value *Dividend = I->getOperand(0); - Value *Divisor = I->getOperand(1); +namespace { +class FastDivInsertionTask { + bool IsSlowDivision = false; + Instruction *SlowDivOrRem = nullptr; + IntegerType *BypassType = nullptr; + BasicBlock *MainBB = nullptr; + + IncomingDivRemPair createSlowBB(BasicBlock *Successor); + IncomingDivRemPair createFastBB(BasicBlock *Successor); + DivRemResult createDivRemPhiNodes(IncomingDivRemPair &LHS, + IncomingDivRemPair &RHS, BasicBlock *PhiBB); + Value *insertOperandRuntimeCheck(); + DivRemResult insertFastDivAndRem(); + + bool isSignedDiv() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::SRem; + } + bool isDivisionOp() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::UDiv; + } + Type *getSlowType() { return SlowDivOrRem->getType(); } + +public: + FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); + Value *getReplacement(DivCacheTy &Cache); +}; +} // anonymous namespace + +FastDivInsertionTask::FastDivInsertionTask(Instruction *I, + const BypassWidthsTy &BypassWidths) { + switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + SlowDivOrRem = I; + break; + default: + // I is not a div/rem operation. + return; + } + + // Skip division on vector types. Only optimize integer instructions. + IntegerType *SlowType = dyn_cast(SlowDivOrRem->getType()); + if (!SlowType) + return; - if (isa(Divisor)) { - // Division by a constant should have been been solved and replaced earlier - // in the pipeline. - return false; + // Skip if this bitwidth is not bypassed. + auto BI = BypassWidths.find(SlowType->getBitWidth()); + if (BI == BypassWidths.end()) + return; + + // Get type for div/rem instruction with bypass bitwidth. + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); + BypassType = BT; + + // The original basic block. + MainBB = I->getParent(); + + // The instruction is indeed a slow division operation. + IsSlowDivision = true; +} + +/// Reuses previously-computed dividend or remainder from the current BB if +/// operands and operation are identical. Otherwise calls insertFastDivAndRem to +/// perform the optimization and caches the resulting dividend and remainder. +/// If no replacement can be generated, nullptr is returned. +Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { + // First, make sure that the task is valid. + if (!IsSlowDivision) + return nullptr; + + // Then, look for a value in Cache. + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + DivOpInfo Key(isSignedDiv(), Dividend, Divisor); + auto CacheI = Cache.find(Key); + + if (CacheI == Cache.end()) { + // If previous instance does not exist, try to insert fast div. + DivRemResult DivRem = insertFastDivAndRem(); + // Bail out if insertFastDivAndRem has failed. + if (DivRem.empty()) + return nullptr; + + // Store the result in the cache. + std::pair KeyVal(Key, DivRem); + CacheI = Cache.insert(KeyVal).first; } - // If the numerator is a constant, bail if it doesn't fit into BypassType. - if (ConstantInt *ConstDividend = dyn_cast(Dividend)) - if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) - return false; - - // Basic Block is split before divide - BasicBlock *MainBB = &*I->getParent(); - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); - - // Add new basic block for slow divide operation - BasicBlock *SlowBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - SlowBB->moveBefore(SuccessorBB); - IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); - Value *SlowQuotientV; - Value *SlowRemainderV; - if (UseSignedOp) { - SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + DivRemResult &Value = CacheI->second; + if (isDivisionOp()) + return Value.Quotient; + else + return Value.Remainder; +} + +/// Add new basic block for slow div and rem operations and put it before +/// SuccessorBB. +IncomingDivRemPair FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { + IncomingDivRemPair DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + + if (isSignedDiv()) { + DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); } else { - SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); } - SlowBuilder.CreateBr(SuccessorBB); - - // Add new basic block for fast divide operation - BasicBlock *FastBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - FastBB->moveBefore(SlowBB); - IRBuilder<> FastBuilder(FastBB, FastBB->begin()); - Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, - BypassType); - Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, - BypassType); - - // udiv/urem because optimization only handles positive numbers - Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); - Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, - ShortDivisorV); - Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, - ShortQuotientV, - Dividend->getType()); - Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, - ShortRemainderV, - Dividend->getType()); - FastBuilder.CreateBr(SuccessorBB); - - // Phi nodes for result of div and rem - IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); - PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - QuoPhi->addIncoming(SlowQuotientV, SlowBB); - QuoPhi->addIncoming(FastQuotientV, FastBB); - PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - RemPhi->addIncoming(SlowRemainderV, SlowBB); - RemPhi->addIncoming(FastRemainderV, FastBB); - - // Replace I with appropriate phi node - if (UseDivOp) - I->replaceAllUsesWith(QuoPhi); - else - I->replaceAllUsesWith(RemPhi); - I->eraseFromParent(); - // Combine operands into a single value with OR for value testing below - MainBB->getInstList().back().eraseFromParent(); - IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Builder.CreateBr(SuccessorBB); + return DivRemPair; +} + +/// Add new basic block for fast div and rem operations and put it before +/// SuccessorBB. +IncomingDivRemPair FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { + IncomingDivRemPair DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + Value *ShortDivisorV = + Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); + Value *ShortDividendV = + Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); + + // udiv/urem because this optimization only handles positive numbers. + Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); + Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); + DivRemPair.Quotient = + Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); + DivRemPair.Remainder = + Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); + Builder.CreateBr(SuccessorBB); + + return DivRemPair; +} + +/// Creates Phi nodes for result of Div and Rem. +DivRemResult FastDivInsertionTask::createDivRemPhiNodes(IncomingDivRemPair &LHS, + IncomingDivRemPair &RHS, + BasicBlock *PhiBB) { + IRBuilder<> Builder(PhiBB, PhiBB->begin()); + PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); + QuoPhi->addIncoming(LHS.Quotient, LHS.BB); + QuoPhi->addIncoming(RHS.Quotient, RHS.BB); + PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); + RemPhi->addIncoming(LHS.Remainder, LHS.BB); + RemPhi->addIncoming(RHS.Remainder, RHS.BB); + return DivRemResult(QuoPhi, RemPhi); +} + +/// Creates a runtime check to test whether both the divisor and dividend fit +/// into BypassType. The check is inserted at the end of MainBB. True return +/// value means that the operands fit. +Value *FastDivInsertionTask::insertOperandRuntimeCheck() { + IRBuilder<> Builder(MainBB, MainBB->end()); + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); // We should have bailed out above if the divisor is a constant, but the // dividend may still be a constant. Set OrV to our non-constant operands @@ -163,65 +267,54 @@ Value *OrV; if (!isa(Dividend)) - OrV = MainBuilder.CreateOr(Dividend, Divisor); + OrV = Builder.CreateOr(Dividend, Divisor); else OrV = Divisor; // BitMask is inverted to check if the operands are // larger than the bypass type uint64_t BitMask = ~BypassType->getBitMask(); - Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); - - // Compare operand values and branch - Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); - Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); - MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); - - // Cache phi nodes to be used later in place of other instances - // of div or rem with the same sign, dividend, and divisor - DivOpInfo Key(UseSignedOp, Dividend, Divisor); - DivPhiNodes Value(QuoPhi, RemPhi); - PerBBDivCache.insert(std::pair(Key, Value)); - return true; + Value *AndV = Builder.CreateAnd(OrV, BitMask); + + // Compare operand values + Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); + return Builder.CreateICmpEQ(AndV, ZeroV); } -// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from -// the current BB if operands and operation are identical. Otherwise calls -// insertFastDiv to perform the optimization and caches the resulting dividend -// and remainder. -static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, - bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) { - // Get instruction operands - DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); - DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); - - if (CacheI == PerBBDivCache.end()) { - // If previous instance does not exist, insert fast div - return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); - } +/// Substitutes the div/rem instruction with code that checks the value of the +/// operands and uses a shorter-faster div/rem instruction when possible. +DivRemResult FastDivInsertionTask::insertFastDivAndRem() { + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); - // Replace operation value with previously generated phi node - DivPhiNodes &Value = CacheI->second; - if (UseDivOp) { - // Replace all uses of div instruction with quotient phi node - I->replaceAllUsesWith(Value.Quotient); - } else { - // Replace all uses of rem instruction with remainder phi node - I->replaceAllUsesWith(Value.Remainder); + if (isa(Divisor)) { + // Keep division by a constant for DAGCombiner. + return DivRemResult(); } - // Remove redundant operation - I->eraseFromParent(); - return true; + // If the numerator is a constant, bail if it doesn't fit into BypassType. + if (ConstantInt *ConstDividend = dyn_cast(Dividend)) + if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) + return DivRemResult(); + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + IncomingDivRemPair Fast = createFastBB(SuccessorBB); + IncomingDivRemPair Slow = createSlowBB(SuccessorBB); + DivRemResult DivRem = createDivRemPhiNodes(Fast, Slow, SuccessorBB); + Value *CmpV = insertOperandRuntimeCheck(); + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); + return DivRem; } -// bypassSlowDivision - This optimization identifies DIV instructions in a BB -// that can be profitably bypassed and carried out with a shorter, faster -// divide. -bool llvm::bypassSlowDivision( - BasicBlock *BB, const DenseMap &BypassWidths) { - DivCacheTy DivCache; +/// This optimization identifies DIV/REM instructions in a BB that can be +/// profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(BasicBlock *BB, + const BypassWidthsTy &BypassWidths) { + DivCacheTy PerBBDivCache; bool MadeChange = false; Instruction* Next = &*BB->begin(); @@ -231,42 +324,20 @@ Instruction* I = Next; Next = Next->getNextNode(); - // Get instruction details - unsigned Opcode = I->getOpcode(); - bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; - bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; - bool UseSignedOp = Opcode == Instruction::SDiv || - Opcode == Instruction::SRem; - - // Only optimize div or rem ops - if (!UseDivOp && !UseRemOp) - continue; - - // Skip division on vector types, only optimize integer instructions - if (!I->getType()->isIntegerTy()) - continue; - - // Get bitwidth of div/rem instruction - IntegerType *T = cast(I->getType()); - unsigned int bitwidth = T->getBitWidth(); - - // Continue if bitwidth is not bypassed - DenseMap::const_iterator BI = BypassWidths.find(bitwidth); - if (BI == BypassWidths.end()) - continue; - - // Get type for div/rem instruction with bypass bitwidth - IntegerType *BT = IntegerType::get(I->getContext(), BI->second); - - MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); + FastDivInsertionTask Task(I, BypassWidths); + if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + MadeChange = true; + } } // Above we eagerly create divs and rems, as pairs, so that we can efficiently // create divrem machine instructions. Now erase any unused divs / rems so we // don't leave extra instructions sitting around. - for (auto &KV : DivCache) - for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder}) - RecursivelyDeleteTriviallyDeadInstructions(Phi); + for (auto &KV : PerBBDivCache) + for (Value *V : {KV.second.Quotient, KV.second.Remainder}) + RecursivelyDeleteTriviallyDeadInstructions(V); return MadeChange; }