Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -122,6 +122,8 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace); + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + bool shouldBuildLookupTablesForConstant(Constant *C) const { // In the ROPI and RWPI relocation models we can't have pointers to global // variables or functions in constant data, so don't convert switches to Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -544,3 +544,86 @@ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); } + +static Value *getTripCountValue(Loop *L) { + // Canonical loops will end with a 'cmp ne I, V', where I is the incremented + // canonical induction variable and V is the trip count of the loop. + PHINode *IV = L->getCanonicalInductionVariable(); + if (!IV) + return nullptr; + + BasicBlock *BackedgeBlock = L->getLoopLatch(); + Value *Inc = IV->getIncomingValueForBlock(BackedgeBlock); + + if (auto *BI = dyn_cast(BackedgeBlock->getTerminator())) { + if (BI->isConditional()) { + if (auto *ICI = dyn_cast(BI->getCondition())) { + if (ICI->getOperand(0) == Inc) { + return ICI->getOperand(1); + } + } + } + } + return nullptr; +} + +void ARMTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + if (!ST->isThumb2() || !ST->isMClass() || (L->getNumBlocks() != 1) + || !L->getExitingBlock() || !L->getUniqueExitBlock()) + return; + + BasicBlock *BB = L->getLoopLatch(); + UP.OptSizeThreshold = 0; + UP.PartialOptSizeThreshold = 0; + + unsigned MemOps = 1; + // Scan the loop: don't unroll loops with calls. + for (auto &I : *BB) { + if (isa(I) || isa(I)) { + ImmutableCallSite CS(&I); + if (const Function *F = CS.getCalledFunction()) { + if (!isLoweredToCall(F)) + continue; + } + return; + } else if (isa(I) || isa(I)) + ++MemOps; + } + + UP.Partial = true; + UP.Threshold = 150; + UP.PartialThreshold = 150; + + // Increase the thresholds for multiple issue machines on more compute + // bound loops. + if (ST->getInstrItineraryData()->SchedModel.IssueWidth > 1) { + if (BB->size() / MemOps > 3) { + UP.PartialThreshold = 300; + UP.Threshold = 300; + } + } + + // Enable runtime unrolling for non-nested loops. + if (L->getLoopDepth() == 1) { + UP.Runtime = true; + return; + } + + // For nested loops, we want the trip count to be invariant in the outer + // loops. + Value *TripCount = getTripCountValue(L); + if (!TripCount) + return; + + Loop *Parent = L->getParentLoop(); + while (Parent) { + if (!Parent->isLoopInvariant(TripCount)) + return; + Parent = Parent->getParentLoop(); + } + + // Lower the threshold for nested runtime loops. + UP.PartialThreshold /= 2; + UP.Runtime = true; +}