diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -173,7 +173,8 @@ /// Aggregates various functions for hoisting computations out of loop. static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU); + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT); /// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A < /// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their /// minimun can be computed outside of loop, and X is not a loop-invariant. @@ -989,7 +990,7 @@ // Try to reassociate instructions so that part of computations can be // done out of loop. - if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU)) { + if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) { Changed = true; continue; } @@ -2495,9 +2496,57 @@ return true; } +/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if +/// this allows hoisting the inner GEP. +static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + auto *GEP = dyn_cast(&I); + if (!GEP) + return false; + + auto *Src = dyn_cast(GEP->getPointerOperand()); + if (!Src || !Src->hasOneUse() || !L.contains(Src)) + return false; + + Value *SrcPtr = Src->getPointerOperand(); + auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); }; + if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant)) + return false; + + assert(!all_of(Src->indices(), LoopInvariant) && + "Would have been hoisted already"); + + // The swapped GEPs are inbounds if both original GEPs are inbounds + // and the sign of the offsets is the same. For simplicity, only + // handle both offsets being non-negative. + const DataLayout &DL = GEP->getModule()->getDataLayout(); + auto NonNegative = [&](Value *V) { + return isKnownNonNegative(V, DL, 0, AC, GEP, DT); + }; + bool IsInBounds = Src->isInBounds() && GEP->isInBounds() && + all_of(Src->indices(), NonNegative) && + all_of(GEP->indices(), NonNegative); + + BasicBlock *Preheader = L.getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr, + SmallVector(GEP->indices()), + "invariant.gep", IsInBounds); + Builder.SetInsertPoint(GEP); + Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc, + SmallVector(Src->indices()), "gep", + IsInBounds); + GEP->replaceAllUsesWith(NewGEP); + eraseInstruction(*GEP, SafetyInfo, MSSAU); + eraseInstruction(*Src, SafetyInfo, MSSAU); + return true; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU) { + MemorySSAUpdater &MSSAU, + AssumptionCache *AC, DominatorTree *DT) { // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. @@ -2505,6 +2554,13 @@ ++NumMinMaxHoisted; return true; } + + // Try to hoist GEPs by reassociation. + if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + return true; + } + return false; } diff --git a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll --- a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll +++ b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll @@ -41,10 +41,9 @@ ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_4: # %if.end9 ; CHECK-NEXT: # -; CHECK-NEXT: add 9, 3, 9 -; CHECK-NEXT: lwz 10, 4(9) +; CHECK-NEXT: lwzx 10, 7, 9 ; CHECK-NEXT: addi 10, 10, 1 -; CHECK-NEXT: stw 10, 4(9) +; CHECK-NEXT: stwx 10, 7, 9 ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_5: # %if.then ; CHECK-NEXT: lwax 3, 9, 3 diff --git a/llvm/test/Transforms/LICM/gep-reassociate.ll b/llvm/test/Transforms/LICM/gep-reassociate.ll --- a/llvm/test/Transforms/LICM/gep-reassociate.ll +++ b/llvm/test/Transforms/LICM/gep-reassociate.ll @@ -11,13 +11,13 @@ ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i32 [[ARG:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG]] to i64 +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[ARG_EXT]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 [[ARG_EXT]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -42,13 +42,13 @@ ; CHECK-LABEL: define void @both_inbounds_one_neg ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 -1 -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -72,13 +72,13 @@ ; CHECK-LABEL: define void @both_inbounds_pos ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR2]], i64 1 -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -102,12 +102,12 @@ ; CHECK-LABEL: define void @different_elem_types ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i64, ptr [[PTR]], i64 [[ARG]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[VAL]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i64, ptr [[PTR2]], i64 [[ARG]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[VAL]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -126,6 +126,62 @@ ret void } +define void @different_index_types(ptr %ptr, i1 %c, i32 %arg) { +; CHECK-LABEL: define void @different_index_types +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i32 [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[ARG]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %val = call i64 @get.i64() + %ptr2 = getelementptr i8, ptr %ptr, i64 %val + %ptr3 = getelementptr i8, ptr %ptr2, i32 %arg + call void @use(ptr %ptr3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @different_index_count(ptr %ptr, i1 %c, i64 %arg1, i64 %arg2) { +; CHECK-LABEL: define void @different_index_count +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %val = call i64 @get.i64() + %ptr2 = getelementptr i8, ptr %ptr, i64 %val + %ptr3 = getelementptr [0 x i8], ptr %ptr2, i64 %arg1, i64 %arg2 + call void @use(ptr %ptr3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} + define void @src_has_extra_use(ptr %ptr, i1 %c, i64 %arg) { ; CHECK-LABEL: define void @src_has_extra_use ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG:%.*]]) { @@ -246,13 +302,13 @@ ; CHECK-LABEL: define void @multiple_indices ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64() ; CHECK-NEXT: [[VAL2:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [0 x i8], ptr [[INVARIANT_GEP]], i64 [[VAL1]], i64 [[VAL2]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -308,12 +364,12 @@ ; CHECK-LABEL: define void @multiple_indices_very_invariant ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]], i64 [[ARG3:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG3]], i64 [[VAL1]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [0 x i8], ptr [[INVARIANT_GEP]], i64 [[ARG3]], i64 [[VAL1]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void