Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2130,8 +2130,17 @@ } } + // Guard the gep(gep) fold so we don't create an add inside a loop + // when there wasn't an equivalent instruction there before. + bool DifferentLoops = false; + if (LI) + if (auto *GEPLoop = LI->getLoopFor(GEP.getParent())) + if (auto *SrcOpI = dyn_cast(Src)) + if (LI->getLoopFor(SrcOpI->getParent()) != GEPLoop) + DifferentLoops = true; + // Fold (gep(gep(Ptr,Idx0),Idx1) -> gep(Ptr,add(Idx0,Idx1)) - if (GO1->getType() == SO1->getType()) { + if (!DifferentLoops && GO1->getType() == SO1->getType()) { bool NewInBounds = GEP.isInBounds() && Src->isInBounds(); auto *NewIdx = Builder.CreateAdd(GO1, SO1, GEP.getName() + ".idx", Index: llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll =================================================================== --- llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -186,3 +186,29 @@ call void @blackhole(<2 x i8*> %e6) br label %loop } + +; Avoid folding the GEP outside the loop to inside, and increasing loop +; instruction count. +define float @gep_cross_loop(i64* %_arg_, float* %_arg_3, float %_arg_8) +{ +entry: + %0 = load i64, i64* %_arg_, align 8 + %add.ptr = getelementptr inbounds float, float* %_arg_3, i64 %0 + br label %for.cond.i + +for.cond.i: ; preds = %for.body.i, %entry + %idx = phi i64 [ 0, %entry ], [ %add11.i, %for.body.i ] + %sum = phi float [ 0.000000e+00, %entry ], [ %add.i, %for.body.i ] + %cmp = icmp ule i64 %idx, 16 + br i1 %cmp, label %for.body.i, label %for.cond.i.i.i.preheader + +for.cond.i.i.i.preheader: ; preds = %for.cond.i + ret float %sum + +for.body.i: ; preds = %for.cond.i + %arrayidx.i84.i = getelementptr inbounds float, float * %add.ptr, i64 %idx + %1 = load float, float* %arrayidx.i84.i, align 4 + %add.i = fadd fast float %sum, %1 + %add11.i = add nsw i64 %idx, 1 + br label %for.cond.i +}