Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -237,7 +237,7 @@ /// loop and loop safety information as arguments. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - LICMSafetyInfo *); + LICMSafetyInfo *, DenseMap &); /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -98,6 +99,14 @@ Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo); +/// \brief Check if operands of two GEP can be swapped and then one of them +// can be hoisted. +static bool +canSwapOperandAndHoistGEPs(Instruction *First, Loop *CurLoop, + DenseMap &NumOfGepBaseUses); +static void swapGEPOperands(Instruction *First, Instruction *Second, + const TargetLibraryInfo *TLI); + namespace { struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid @@ -144,6 +153,8 @@ Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... DenseMap LoopToAliasSetMap; + // Number of times a GEP base used inside a loop. + DenseMap NumOfGepBaseUses; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, @@ -212,6 +223,7 @@ // Get the preheader block to move instructions into... Preheader = L->getLoopPreheader(); + NumOfGepBaseUses.clear(); // Loop over the body of this loop, looking for calls, invokes, and stores. // Because subloops have already been incorporated into AST, we skip blocks in // subloops. @@ -221,6 +233,11 @@ BasicBlock *BB = *I; if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block + + // Record number of times the base of a GEP is used inside a loop. + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) + if (GetElementPtrInst *GEP = dyn_cast(II)) + NumOfGepBaseUses[GEP->getOperand(0)]++; } // Compute loop safety information. @@ -242,7 +259,7 @@ CurAST, &SafetyInfo); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, - CurLoop, CurAST, &SafetyInfo); + CurLoop, CurAST, &SafetyInfo, NumOfGepBaseUses); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -354,11 +371,12 @@ /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo, + DenseMap &NumOfGepBaseUses) { // Verify inputs. - assert(N != nullptr && AA != nullptr && LI != nullptr && - DT != nullptr && CurLoop != nullptr && CurAST != nullptr && - SafetyInfo != nullptr && "Unexpected input to hoistRegion"); + assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && + CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + "Unexpected input to hoistRegion"); // Set changed as false. bool Changed = false; // Get basic block @@ -370,6 +388,8 @@ if (!inSubLoop(BB, CurLoop, LI)) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { Instruction &I = *II++; + Instruction *INext = II; + // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just // fold it. @@ -392,12 +412,17 @@ isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo, CurLoop->getLoopPreheader()->getTerminator())) Changed |= hoist(I, CurLoop->getLoopPreheader()); + + if (canSwapOperandAndHoistGEPs(&I, CurLoop, NumOfGepBaseUses)) { + swapGEPOperands(&I, INext, TLI); + hoist(I, CurLoop->getLoopPreheader()); + } } const std::vector &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= - hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + Changed |= hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, + SafetyInfo, NumOfGepBaseUses); return Changed; } @@ -1023,3 +1048,102 @@ return LI->getLoopFor(BB) != CurLoop; } +static bool +canSwapOperandAndHoistGEPs(Instruction *First, Loop *CurLoop, + DenseMap &NumOfGepBaseUses) { + if (!First || First->getNumUses() != 1) + return false; + + Instruction *Second = cast(*First->user_begin()); + if (!Second || First->getParent() != Second->getParent()) + return false; + + GetElementPtrInst *FirstGEP = dyn_cast(First); + GetElementPtrInst *SecondGEP = dyn_cast(Second); + if (!FirstGEP || !SecondGEP) + return false; + + unsigned FirstNum = FirstGEP->getNumOperands(); + unsigned SecondNum = SecondGEP->getNumOperands(); + // Give up if the number of operands are not 2. + if (FirstNum != SecondNum || SecondNum != 2) + return false; + + Value *FirstBase = FirstGEP->getOperand(0); + Value *SecondBase = SecondGEP->getOperand(0); + Value *FirstOffset = FirstGEP->getOperand(1); + Value *SecondOffset = SecondGEP->getOperand(1); + // Give up if the first base is not loop invariant or used more than once. + if (!CurLoop->isLoopInvariant(FirstBase) || NumOfGepBaseUses[FirstBase] != 1) + return false; + + // Give up if the second operand of the first GEP is loop invariant. + if (CurLoop->isLoopInvariant(FirstOffset)) + return false; + + // Give up if the second operand of second GEP is not constant. + if (!dyn_cast(SecondOffset)) + return false; + + // Give up if base doesn't have same type. + if (FirstBase->getType() != SecondBase->getType()) + return false; + + // Give up if second is not the only user of First. + if (!First->hasOneUse() || + dyn_cast(*First->user_begin()) != Second || + dyn_cast(SecondBase) != First) + return false; + + Instruction *FirstOffsetDef = dyn_cast(FirstOffset); + + // Check if the second operand of first GEP has constant coefficient. + // For an example, for the following code, we won't gain anything by + // hoisting the second GEP out because the second GEP can be folded away. + // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256 + // %67 = shl i64 %scevgep.sum.ur159, 2 + // %uglygep160 = getelementptr i8* %65, i64 %67 + // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024 + + // Skip constant shift instruction which may be generated by Splitting GEPs. + if (FirstOffsetDef && FirstOffsetDef->isShift() && + dyn_cast(FirstOffsetDef->getOperand(1))) + FirstOffsetDef = dyn_cast(FirstOffsetDef->getOperand(0)); + + // Give up if FirstOffsetDef is an Add or Sub with constant. + // Because it may not profitable at all due to constant folding. + if (FirstOffsetDef) + if (BinaryOperator *BO = dyn_cast(FirstOffsetDef)) { + unsigned opc = BO->getOpcode(); + if ((opc == Instruction::Add || opc == Instruction::Sub) && + (dyn_cast(BO->getOperand(0)) || + dyn_cast(BO->getOperand(1)))) + return false; + } + return true; +} + +static void swapGEPOperands(Instruction *First, Instruction *Second, + const TargetLibraryInfo *TLI) { + Value *Offset1 = First->getOperand(1); + Value *Offset2 = Second->getOperand(1); + First->setOperand(1, Offset2); + Second->setOperand(1, Offset1); + + // We changed p+o+c to p+c+o, p+c may not be inbound anymore. + // However the inbound attribute of the p+c+o should be the same. + GetElementPtrInst *FirstGEP = dyn_cast(First); + const DataLayout &DAL = First->getModule()->getDataLayout(); + APInt Offset(DAL.getPointerSizeInBits( + cast(First->getType())->getAddressSpace()), + 0); + Value *NewBase = + First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset); + uint64_t ObjectSize; + if (!getObjectSize(NewBase, ObjectSize, DAL, TLI)) + FirstGEP->setIsInBounds(false); + else if (Offset.ule(ObjectSize)) + FirstGEP->setIsInBounds(true); + else + FirstGEP->setIsInBounds(false); +} Index: test/Transforms/LICM/hoist-gep.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/hoist-gep.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -licm -S | FileCheck %s +target triple = "aarch64--linux-android" + +%typeD = type { i32, i32, [256 x i32], [257 x i32] } + +; Function Attrs: noreturn nounwind uwtable +define i32 @test1(%typeD* nocapture %s) { +entry: +; CHECK-LABEL: entry: +; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032 +; CHECK-NEXT: br label %do.body.i + %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0 + %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1 + %.pre = load i32, i32* %tPos, align 4 + br label %do.body.i + +do.body.i: +; CHECK-LABEL: do.body.i: +; CHECK: %uglygep7 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NOT: %uglygep7 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: br i1 %cmp1.i, label %X__indexIntoF.exit, label %do.body.i.backedge + %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] + %1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ] + %add.i = add nsw i32 %1, %0 + %shr.i = ashr i32 %add.i, 1 + %idxprom.i = sext i32 %shr.i to i64 + %2 = bitcast %typeD* %s to i8* + %3 = shl i64 %idxprom.i, 2 + %uglygep = getelementptr i8, i8* %2, i64 %3 + %uglygep7 = getelementptr i8, i8* %uglygep, i64 1032 + %4 = bitcast i8* %uglygep7 to i32* + %5 = load i32, i32* %4, align 4 + %cmp.i = icmp sle i32 %5, %.pre + %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i + %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1 + %sub.i = sub nsw i32 %na.1.i, %nb.1.i + %cmp1.i = icmp eq i32 %sub.i, 1 + br i1 %cmp1.i, label %X__indexIntoF.exit, label %do.body.i.backedge + +do.body.i.backedge: ; preds = %do.body.i, %X__indexIntoF.exit + %.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %X__indexIntoF.exit ] + %.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %X__indexIntoF.exit ] + br label %do.body.i + +X__indexIntoF.exit: ; preds = %do.body.i + store i32 %nb.1.i, i32* %k0, align 4 + br label %do.body.i.backedge +} +