Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2130,8 +2130,17 @@ } } + // Guard the gep(gep) fold so we don't create an add inside a loop + // when there wasn't an equivalent instruction there before. + bool DifferentLoops = false; + if (LI) + if (auto *GEPLoop = LI->getLoopFor(GEP.getParent())) + if (auto *SrcOpI = dyn_cast(Src)) + if (LI->getLoopFor(SrcOpI->getParent()) != GEPLoop) + DifferentLoops = true; + // Fold (gep(gep(Ptr,Idx0),Idx1) -> gep(Ptr,add(Idx0,Idx1)) - if (GO1->getType() == SO1->getType()) { + if (!DifferentLoops && GO1->getType() == SO1->getType()) { bool NewInBounds = GEP.isInBounds() && Src->isInBounds(); auto *NewIdx = Builder.CreateAdd(GO1, SO1, GEP.getName() + ".idx", Index: llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll =================================================================== --- llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -186,3 +186,48 @@ call void @blackhole(<2 x i8*> %e6) br label %loop } + +; Avoid folding the GEP outside the loop to inside, and increasing loop +; instruction count. +define float @gep_cross_loop(i64* %_arg_, float* %_arg_3, float %_arg_8) +; CHECK-LABEL: @gep_cross_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[_ARG_:%.*]], align 8 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[_ARG_3:%.*]], i64 [[TMP0]] +; CHECK-NEXT: br label [[FOR_COND_I:%.*]] +; CHECK: for.cond.i: +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD11_I:%.*]], [[FOR_BODY_I:%.*]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I:%.*]], [[FOR_BODY_I]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 17 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_I]], label [[FOR_COND_I_I_I_PREHEADER:%.*]] +; CHECK: for.cond.i.i.i.preheader: +; CHECK-NEXT: ret float [[SUM]] +; CHECK: for.body.i: +; CHECK-NEXT: [[ARRAYIDX_I84_I:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_I84_I]], align 4 +; CHECK-NEXT: [[ADD_I]] = fadd fast float [[SUM]], [[TMP1]] +; CHECK-NEXT: [[ADD11_I]] = add nuw nsw i64 [[IDX]], 1 +; CHECK-NEXT: br label [[FOR_COND_I]] +; +{ +entry: + %0 = load i64, i64* %_arg_, align 8 + %add.ptr = getelementptr inbounds float, float* %_arg_3, i64 %0 + br label %for.cond.i + +for.cond.i: ; preds = %for.body.i, %entry + %idx = phi i64 [ 0, %entry ], [ %add11.i, %for.body.i ] + %sum = phi float [ 0.000000e+00, %entry ], [ %add.i, %for.body.i ] + %cmp = icmp ule i64 %idx, 16 + br i1 %cmp, label %for.body.i, label %for.cond.i.i.i.preheader + +for.cond.i.i.i.preheader: ; preds = %for.cond.i + ret float %sum + +for.body.i: ; preds = %for.cond.i + %arrayidx.i84.i = getelementptr inbounds float, float * %add.ptr, i64 %idx + %1 = load float, float* %arrayidx.i84.i, align 4 + %add.i = fadd fast float %sum, %1 + %add11.i = add nsw i64 %idx, 1 + br label %for.cond.i +}