Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -175,6 +175,9 @@ /// minimun can be computed outside of loop, and X is not a loop-invariant. static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU); +static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU); @@ -998,6 +1001,13 @@ continue; } + // Try to hoist GEPs by reassociation. + if (hoistGEP(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + Changed = true; + continue; + } + // Remember possibly hoistable branches so we can actually hoist them // later if needed. if (BranchInst *BI = dyn_cast(&I)) @@ -2487,6 +2497,53 @@ return true; } +/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if +/// this allows hoisting the inner GEP. +static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + auto *GEP = dyn_cast(&I); + if (!GEP) + return false; + + auto *Src = dyn_cast(GEP->getPointerOperand()); + if (!Src || !Src->hasOneUse() || !L.contains(Src)) + return false; + + Value *SrcPtr = Src->getPointerOperand(); + auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); }; + if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant)) + return false; + + assert(!all_of(Src->indices(), LoopInvariant) && + "Would have been hoisted already"); + + // The swapped GEPs are inbounds if both original GEPs are inbounds + // and the sign of the offsets is the same. For simplicity, only + // handle both offsets being non-negative. + const DataLayout &DL = GEP->getModule()->getDataLayout(); + auto NonNegative = [&](Value *V) { + return isKnownNonNegative(V, DL, 0, AC, GEP, DT); + }; + bool IsInBounds = Src->isInBounds() && GEP->isInBounds() && + all_of(Src->indices(), NonNegative) && + all_of(GEP->indices(), NonNegative); + + BasicBlock *Preheader = L.getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewSrc = + Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr, + SmallVector(GEP->indices()), "", IsInBounds); + Builder.SetInsertPoint(GEP); + Value *NewGEP = + Builder.CreateGEP(Src->getSourceElementType(), NewSrc, + SmallVector(Src->indices()), "", IsInBounds); + GEP->replaceAllUsesWith(NewGEP); + eraseInstruction(*GEP, SafetyInfo, MSSAU); + eraseInstruction(*Src, SafetyInfo, MSSAU); + return true; +} + /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// Index: llvm/test/Transforms/LICM/gep-reassociate.ll =================================================================== --- llvm/test/Transforms/LICM/gep-reassociate.ll +++ llvm/test/Transforms/LICM/gep-reassociate.ll @@ -11,13 +11,13 @@ ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i32 [[ARG:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[ARG_EXT]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 [[ARG_EXT]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[TMP1]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -42,13 +42,13 @@ ; CHECK-LABEL: define void @both_inbounds_one_neg ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 -1 -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[TMP1]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -72,13 +72,13 @@ ; CHECK-LABEL: define void @both_inbounds_pos ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR2]], i64 1 -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[TMP1]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -102,12 +102,12 @@ ; CHECK-LABEL: define void @different_elem_types ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[PTR]], i64 [[ARG]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[VAL]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i64, ptr [[PTR2]], i64 [[ARG]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[VAL]] +; CHECK-NEXT: call void @use(ptr [[TMP1]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -246,13 +246,13 @@ ; CHECK-LABEL: define void @multiple_indices ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64() ; CHECK-NEXT: [[VAL2:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [0 x i8], ptr [[TMP0]], i64 [[VAL1]], i64 [[VAL2]] +; CHECK-NEXT: call void @use(ptr [[TMP1]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -308,12 +308,12 @@ ; CHECK-LABEL: define void @multiple_indices_very_invariant ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]], i64 [[ARG3:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG3]], i64 [[VAL1]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [0 x i8], ptr [[TMP0]], i64 [[ARG3]], i64 [[VAL1]] +; CHECK-NEXT: call void @use(ptr [[TMP1]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void