diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2195,6 +2195,48 @@ } } + if (LI && (GEP.getNumIndices() == 1) && !GEP.getType()->isVectorTy()) { + auto *BB = GEP.getParent(); + auto *L = LI->getLoopFor(BB); + auto *Idx = dyn_cast(GEP.getOperand(1)); + + // Try to reassociate loop invariant index calculations to enable LICM. + if (L && Idx && (Idx->getOpcode() == Instruction::Add)) { + Value *Ptr = GEP.getOperand(0); + Value *InvIdx = Idx->getOperand(0); + Value *NonInvIdx = Idx->getOperand(1); + + if (!L->isLoopInvariant(InvIdx)) + std::swap(InvIdx, NonInvIdx); + + if (L->isLoopInvariant(InvIdx) && !L->isLoopInvariant(NonInvIdx) && + L->isLoopInvariant(Ptr)) { + // Ensure Idx can be eliminated. + auto IsDead = [BB, L](User *U) { + auto *G = dyn_cast(U); + return G && (G->getNumIndices() == 1) && (G->getParent() == BB) && + L->isLoopInvariant(G->getOperand(0)); + }; + + if (Idx->hasOneUse() || + std::all_of(Idx->user_begin(), Idx->user_end(), IsDead)) { + // rewrite: + // %idx = add i64 %invariant, %indvars.iv + // %gep = getelementptr i32, i32* %ptr, i64 %idx + // as: + // %newptr = getelementptr i32, i32* %ptr, i64 %invariant + // %newgep = getelementptr i32, i32* %newptr, i64 %indvars.iv + auto *NewPtr = GetElementPtrInst::Create(GEP.getResultElementType(), + Ptr, InvIdx, "", &GEP); + auto *NewGEP = GetElementPtrInst::Create(GEP.getResultElementType(), + NewPtr, NonInvIdx); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } + } + } + } + // We do not handle pointer-vector geps here. if (GEPType->isVectorTy()) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -182,6 +182,88 @@ br label %loop } +; Test that ADD->GEP chains get reassociated to separate invariants and +; thus provide more LICM opportunities. +define void @test1(float* %a, float* %b, i64 %disp) { +; CHECK-LABEL: @test1( +entry: + br label %for.body + +for.body: +; CHECK: for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: [[INV:%.*]] = getelementptr float, float* %a, i64 %disp +; CHECK: %arrayidx = getelementptr inbounds float, float* [[INV]], i64 %indvars.iv + %idx = add i64 %indvars.iv, %disp + %arrayidx = getelementptr inbounds float, float* %a, i64 %idx + %0 = load float, float* %arrayidx + %div = fdiv float 1.500000e+00, %0 +; CHECK: [[INV1:%.*]] = getelementptr float, float* %b, i64 %disp +; CHECK: %arrayidx1 = getelementptr inbounds float, float* [[INV1]], i64 %indvars.iv + %arrayidx1 = getelementptr inbounds float, float* %b, i64 %idx + store float %div, float* %arrayidx1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +; As test1 but esnure no transformation when we cannot prove the original ADD +; will become redundant, because it's used by a non-GEP. +define i64 @test2(float* %a, float* %b, i64 %disp) { +; CHECK-LABEL: @test2( +entry: + br label %for.body + +for.body: +; CHECK: for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: %idx = add i64 %indvars.iv, %disp +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 %idx + %idx = add i64 %indvars.iv, %disp + %arrayidx = getelementptr inbounds float, float* %a, i64 %idx + %0 = load float, float* %arrayidx + %div = fdiv float 1.500000e+00, %0 +; CHECK: %arrayidx1 = getelementptr inbounds float, float* %b, i64 %idx + %arrayidx1 = getelementptr inbounds float, float* %b, i64 %idx + store float %div, float* %arrayidx1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret i64 %idx +} + +; As test1 but esnure no transformation when we cannot prove the original ADD +; will become redundant, because the second GEP's ptr is not invariant. +define void @test3(float* %a, float* %b, i64 %disp) { +; CHECK-LABEL: @test3( +entry: + br label %for.body + +for.body: +; CHECK: for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK: %idx = add i64 %indvars.iv, %disp +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 %idx + %idx = add i64 %indvars.iv, %disp + %arrayidx = getelementptr inbounds float, float* %a, i64 %idx + %0 = load float, float* %arrayidx + %div = fdiv float 1.500000e+00, %0 +; CHECK: %arrayidx1 = getelementptr inbounds float, float* %arrayidx, i64 %idx + %arrayidx1 = getelementptr inbounds float, float* %arrayidx, i64 %idx + store float %div, float* %arrayidx1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + ; This would crash because we did not expect to be able to constant fold a GEP. define void @PR51485(<2 x i64> %v) {