Index: lib/Transforms/Utils/BypassSlowDivision.cpp =================================================================== --- lib/Transforms/Utils/BypassSlowDivision.cpp +++ lib/Transforms/Utils/BypassSlowDivision.cpp @@ -70,91 +70,174 @@ }; typedef DenseMap DivCacheTy; + typedef DenseMap BypassWidthsTy; } -// insertFastDiv - Substitutes the div/rem instruction with code that checks the -// value of the operands and uses a shorter-faster div/rem instruction when -// possible and the longer-slower div/rem instruction otherwise. -static bool insertFastDiv(Instruction *I, IntegerType *BypassType, - bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) { - Function *F = I->getParent()->getParent(); - // Get instruction operands - Value *Dividend = I->getOperand(0); - Value *Divisor = I->getOperand(1); +namespace { +class FastDivInsertionTask { + // These fields are set during initialization. + unsigned Opcode; + Instruction *SlowDiv; + IntegerType *SlowType; + IntegerType *BypassType; + BasicBlock *MainBB; + Value *Dividend; + Value *Divisor; + + // These fields are used to pass intermediate results through private + // methods. + Value *ShortQuotientV; + Value *ShortRemainderV; + Value *LongQuotientV; + Value *LongRemainderV; + + BasicBlock *createSlowBB(BasicBlock *Successor); + BasicBlock *createFastBB(BasicBlock *Successor); + void createDivRemPhiNodes(BasicBlock *ShortBB, BasicBlock *LongBB, + BasicBlock *PhiBB, DivCacheTy &PerBBDivCache); + Value *createDivRunTimeCheck(); + +public: + /// Sets up the object to work with instruction \p I. Returns false if the + /// instruction is not a scalar integer division operation. + bool setScalarIDivOp(Instruction *I); + void setBypassType(IntegerType *BT) { BypassType = BT; } + + Instruction *getSlowDiv() { return SlowDiv; } + IntegerType *getSlowType() { return SlowType; } + Value *getDividend() { return Dividend; } + Value *getDivisor() { return Divisor; } + + bool isSignedDiv() { + return Opcode == Instruction::SDiv || Opcode == Instruction::SRem; + } + bool isDivisionOp() { + return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + } - if (isa(Divisor)) { - // Division by a constant should have been been solved and replaced earlier - // in the pipeline. + /// Tries to replace a division with a faster code and returns true if + /// succeeds. + bool insertFastDiv(DivCacheTy &PerBBDivCache); +}; + +class FastDivInserter { + const BypassWidthsTy &BypassWidths; + DivCacheTy PerBBDivCache; + +public: + FastDivInserter(const BypassWidthsTy &BW) : BypassWidths(BW) {} + DivCacheTy &getDivCache() { return PerBBDivCache; } + + /// Checks that \p I is indeed a slow division and initializes \p Task + /// properly. In case of success returns true. + bool isSlowDiv(Instruction *I, FastDivInsertionTask &Task); + + /// Tries to replace a slow division with either existing operations or with + /// new ones. Returns true if succeeds. + bool reuseOrInsertFastDiv(FastDivInsertionTask &Task); +}; +} + +bool FastDivInsertionTask::setScalarIDivOp(Instruction *I) { + SlowDiv = I; + Opcode = I->getOpcode(); + BypassType = nullptr; // To make sure BypassType is set before using. + + // Only optimize div or rem ops + switch (Opcode) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + break; + default: return false; } - // If the numerator is a constant, bail if it doesn't fit into BypassType. - if (ConstantInt *ConstDividend = dyn_cast(Dividend)) - if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) - return false; + // Skip division on vector types, only optimize integer instructions + if (!I->getType()->isIntegerTy()) + return false; - // Basic Block is split before divide - BasicBlock *MainBB = &*I->getParent(); - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); - - // Add new basic block for slow divide operation - BasicBlock *SlowBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - SlowBB->moveBefore(SuccessorBB); - IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); - Value *SlowQuotientV; - Value *SlowRemainderV; - if (UseSignedOp) { - SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + SlowType = cast(I->getType()); + MainBB = I->getParent(); + Dividend = SlowDiv->getOperand(0); + Divisor = SlowDiv->getOperand(1); + + return true; +} + +/// Add new basic block for slow divide operation and put it before SuccessorBB. +BasicBlock *FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { + BasicBlock *SlowBB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(SlowBB, SlowBB->begin()); + + if (isSignedDiv()) { + LongQuotientV = Builder.CreateSDiv(Dividend, Divisor); + LongRemainderV = Builder.CreateSRem(Dividend, Divisor); } else { - SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + LongQuotientV = Builder.CreateUDiv(Dividend, Divisor); + LongRemainderV = Builder.CreateURem(Dividend, Divisor); } - SlowBuilder.CreateBr(SuccessorBB); - - // Add new basic block for fast divide operation - BasicBlock *FastBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - FastBB->moveBefore(SlowBB); - IRBuilder<> FastBuilder(FastBB, FastBB->begin()); - Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, - BypassType); - Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, - BypassType); + + Builder.CreateBr(SuccessorBB); + return SlowBB; +} + +/// Add new basic block for fast divide operation and put it before SuccessorBB. +BasicBlock *FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { + BasicBlock *FastBB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(FastBB, FastBB->begin()); + Value *ShortDivisorV = + Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); + Value *ShortDividendV = + Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); // udiv/urem because optimization only handles positive numbers - Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); - Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, - ShortDivisorV); - Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, - ShortQuotientV, - Dividend->getType()); - Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, - ShortRemainderV, - Dividend->getType()); - FastBuilder.CreateBr(SuccessorBB); - - // Phi nodes for result of div and rem - IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); - PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - QuoPhi->addIncoming(SlowQuotientV, SlowBB); - QuoPhi->addIncoming(FastQuotientV, FastBB); - PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - RemPhi->addIncoming(SlowRemainderV, SlowBB); - RemPhi->addIncoming(FastRemainderV, FastBB); - - // Replace I with appropriate phi node - if (UseDivOp) - I->replaceAllUsesWith(QuoPhi); + Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); + Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); + ShortQuotientV = Builder.CreateCast(Instruction::ZExt, ShortQV, SlowType); + ShortRemainderV = Builder.CreateCast(Instruction::ZExt, ShortRV, SlowType); + Builder.CreateBr(SuccessorBB); + + return FastBB; +} + +// Create Phi nodes for result of Div and Rem. +void FastDivInsertionTask::createDivRemPhiNodes(BasicBlock *ShortBB, + BasicBlock *LongBB, + BasicBlock *PhiBB, + DivCacheTy &PerBBDivCache) { + IRBuilder<> Builder(PhiBB, PhiBB->begin()); + PHINode *QuoPhi = Builder.CreatePHI(SlowType, 2); + QuoPhi->addIncoming(LongQuotientV, LongBB); + QuoPhi->addIncoming(ShortQuotientV, ShortBB); + PHINode *RemPhi = Builder.CreatePHI(SlowType, 2); + RemPhi->addIncoming(LongRemainderV, LongBB); + RemPhi->addIncoming(ShortRemainderV, ShortBB); + + // Replace SlowDiv with appropriate phi node + if (isDivisionOp()) + SlowDiv->replaceAllUsesWith(QuoPhi); else - I->replaceAllUsesWith(RemPhi); - I->eraseFromParent(); + SlowDiv->replaceAllUsesWith(RemPhi); - // Combine operands into a single value with OR for value testing below - MainBB->getInstList().back().eraseFromParent(); - IRBuilder<> MainBuilder(MainBB, MainBB->end()); + SlowDiv->eraseFromParent(); + SlowDiv = nullptr; + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(isSignedDiv(), Dividend, Divisor); + DivPhiNodes Val(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair(Key, Val)); +} + +// Creates a run-time check to test whether both operands fit the shorter type. +// The check is inserted at the end of MainBB. +// True return value means that the operands fit. +Value *FastDivInsertionTask::createDivRunTimeCheck() { + IRBuilder<> Builder(MainBB, MainBB->end()); // We should have bailed out above if the divisor is a constant, but the // dividend may still be a constant. Set OrV to our non-constant operands @@ -163,25 +246,63 @@ Value *OrV; if (!isa(Dividend)) - OrV = MainBuilder.CreateOr(Dividend, Divisor); + OrV = Builder.CreateOr(Dividend, Divisor); else OrV = Divisor; // BitMask is inverted to check if the operands are // larger than the bypass type uint64_t BitMask = ~BypassType->getBitMask(); - Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + Value *AndV = Builder.CreateAnd(OrV, BitMask); - // Compare operand values and branch - Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); - Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); - MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + // Compare operand values + Value *ZeroV = ConstantInt::getSigned(SlowType, 0); + return Builder.CreateICmpEQ(AndV, ZeroV); +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +bool FastDivInsertionTask::insertFastDiv(DivCacheTy &PerBBDivCache) { + if (isa(Divisor)) { + // Division by a constant should have been been solved and replaced earlier + // in the pipeline. + return false; + } + + // If the numerator is a constant, bail if it doesn't fit into BypassType. + if (ConstantInt *ConstDividend = dyn_cast(Dividend)) + if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) + return false; + + // Basic Block is split before divide + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDiv); + MainBB->getInstList().back().eraseFromParent(); + BasicBlock *FastBB = createFastBB(SuccessorBB); + BasicBlock *SlowBB = createSlowBB(SuccessorBB); + createDivRemPhiNodes(FastBB, SlowBB, SuccessorBB, PerBBDivCache); + Value *CmpV = createDivRunTimeCheck(); + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.CreateCondBr(CmpV, FastBB, SlowBB); + + return true; +} + +bool FastDivInserter::isSlowDiv(Instruction *I, FastDivInsertionTask &Task) { + if (!Task.setScalarIDivOp(I)) + return false; + + unsigned int bitwidth = Task.getSlowType()->getBitWidth(); + + // Skip if bitwidth is not bypassed + BypassWidthsTy::const_iterator BI = BypassWidths.find(bitwidth); + if (BI == BypassWidths.end()) + return false; + + // Get type for div/rem instruction with bypass bitwidth + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); + Task.setBypassType(BT); - // Cache phi nodes to be used later in place of other instances - // of div or rem with the same sign, dividend, and divisor - DivOpInfo Key(UseSignedOp, Dividend, Divisor); - DivPhiNodes Value(QuoPhi, RemPhi); - PerBBDivCache.insert(std::pair(Key, Value)); return true; } @@ -189,39 +310,40 @@ // the current BB if operands and operation are identical. Otherwise calls // insertFastDiv to perform the optimization and caches the resulting dividend // and remainder. -static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, - bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) { +bool FastDivInserter::reuseOrInsertFastDiv(FastDivInsertionTask &Task) { + Instruction *SlowDiv = Task.getSlowDiv(); + // Get instruction operands - DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); + DivOpInfo Key(Task.isSignedDiv(), Task.getDividend(), Task.getDivisor()); DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); if (CacheI == PerBBDivCache.end()) { // If previous instance does not exist, insert fast div - return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); + return Task.insertFastDiv(PerBBDivCache); } // Replace operation value with previously generated phi node DivPhiNodes &Value = CacheI->second; - if (UseDivOp) { + if (Task.isDivisionOp()) { // Replace all uses of div instruction with quotient phi node - I->replaceAllUsesWith(Value.Quotient); + SlowDiv->replaceAllUsesWith(Value.Quotient); } else { // Replace all uses of rem instruction with remainder phi node - I->replaceAllUsesWith(Value.Remainder); + SlowDiv->replaceAllUsesWith(Value.Remainder); } // Remove redundant operation - I->eraseFromParent(); + SlowDiv->eraseFromParent(); return true; } // bypassSlowDivision - This optimization identifies DIV instructions in a BB // that can be profitably bypassed and carried out with a shorter, faster // divide. -bool llvm::bypassSlowDivision( - BasicBlock *BB, const DenseMap &BypassWidths) { - DivCacheTy DivCache; +bool llvm::bypassSlowDivision(BasicBlock *BB, + const BypassWidthsTy &BypassWidths) { + FastDivInserter FDI(BypassWidths); + FastDivInsertionTask Task; bool MadeChange = false; Instruction* Next = &*BB->begin(); @@ -231,40 +353,16 @@ Instruction* I = Next; Next = Next->getNextNode(); - // Get instruction details - unsigned Opcode = I->getOpcode(); - bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; - bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; - bool UseSignedOp = Opcode == Instruction::SDiv || - Opcode == Instruction::SRem; - - // Only optimize div or rem ops - if (!UseDivOp && !UseRemOp) - continue; - - // Skip division on vector types, only optimize integer instructions - if (!I->getType()->isIntegerTy()) + if (!FDI.isSlowDiv(I, Task)) continue; - // Get bitwidth of div/rem instruction - IntegerType *T = cast(I->getType()); - unsigned int bitwidth = T->getBitWidth(); - - // Continue if bitwidth is not bypassed - DenseMap::const_iterator BI = BypassWidths.find(bitwidth); - if (BI == BypassWidths.end()) - continue; - - // Get type for div/rem instruction with bypass bitwidth - IntegerType *BT = IntegerType::get(I->getContext(), BI->second); - - MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); + MadeChange |= FDI.reuseOrInsertFastDiv(Task); } // Above we eagerly create divs and rems, as pairs, so that we can efficiently // create divrem machine instructions. Now erase any unused divs / rems so we // don't leave extra instructions sitting around. - for (auto &KV : DivCache) + for (auto &KV : FDI.getDivCache()) for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder}) RecursivelyDeleteTriviallyDeadInstructions(Phi);