Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5650,6 +5650,45 @@ DeadInsts.emplace_back(OperandIsInstr); } +// Check the instruction's all user is in the basicblock +static bool isUserAllWithinBB(Value *V, BasicBlock *BB) { + for (User *U : V->users()) { + Instruction *UI = cast(U); + if (UI->getParent() != BB) + return false; + } + return true; +} + +// Trying to hoist the IVInc to loop header if all IVInc user is in +// the loop header. It will help backend to generate post index load/store +// when the latch block is different from loop header block. +static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, + const LSRUse &LU, Instruction *IVIncInsertPos, + Loop *L) { + if (LU.Kind != LSRUse::Address) + return false; + + // For now this code do the conservative optimization, only work for + // the header block. Later we can hoist the IVInc to the block post + // dominate all users. + BasicBlock *LHeader = L->getHeader(); + if (IVIncInsertPos->getParent() == LHeader) + return false; + + if (!Fixup.OperandValToReplace || + !isUserAllWithinBB(Fixup.OperandValToReplace, LHeader)) + return false; + + Instruction *I = Fixup.UserInst; + if ((isa(I) && + TTI.isIndexedLoadLegal(TTI.MIM_PostInc, I->getType())) || + (isa(I) && + TTI.isIndexedStoreLegal(TTI.MIM_PostInc, I->getType()))) + return true; + return false; +} + /// Rewrite all the fixup locations with new values, following the chosen /// solution. void LSRInstance::ImplementSolution( @@ -5658,8 +5697,6 @@ // we can remove them after we are done working. SmallVector DeadInsts; - Rewriter.setIVIncInsertPos(L, IVIncInsertPos); - // Mark phi nodes that terminate chains so the expander tries to reuse them. for (const IVChain &Chain : IVChainVec) { if (PHINode *PN = dyn_cast(Chain.tailUserInst())) @@ -5669,6 +5706,11 @@ // Expand the new value definitions and update the users. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { + Instruction *InsertPos = + canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L) + ? L->getHeader()->getTerminator() + : IVIncInsertPos; + Rewriter.setIVIncInsertPos(L, InsertPos); Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); Changed = true; } Index: llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll +++ llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll @@ -13,11 +13,10 @@ ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: ldr w9, [x1], #4 ; CHECK-NEXT: cbnz w9, .LBB0_5 ; CHECK-NEXT: // %bb.3: // %for.cond ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: add x1, x1, #4 ; CHECK-NEXT: subs x8, x8, #1 ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: .LBB0_4: Index: llvm/test/Transforms/LoopStrengthReduce/ivincs-hoist.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/ivincs-hoist.ll +++ llvm/test/Transforms/LoopStrengthReduce/ivincs-hoist.ll @@ -1,10 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-reduce -S < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" -define i32 @test(i32 %c, ptr %a, ptr %b) { -; CHECK-LABEL: @test( +define i32 @IVIncHoist(i32 %c, ptr %a, ptr %b) { +; CHECK-LABEL: @IVIncHoist( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP13]], label [[FOR_BODY_PREHEADER:%.*]], label [[RETURN:%.*]] @@ -12,15 +13,15 @@ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[C]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[LSR_IV:%.*]], i64 4 ; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nsw i64 [[LSR_IV1:%.*]], -1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_BODY]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_COND:%.*]] ], [ [[WIDE_TRIP_COUNT]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[LSR_IV]] = phi ptr [ [[UGLYGEP]], [[FOR_COND]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[FOR_COND]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[LSR_IV]], align 4 ; CHECK-NEXT: [[TOBOOL3_NOT:%.*]] = icmp eq i32 [[VAL]], 0 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4 ; CHECK-NEXT: br i1 [[TOBOOL3_NOT]], label [[FOR_COND]], label [[RETURN_LOOPEXIT]] ; CHECK: return.loopexit: ; CHECK-NEXT: [[RETVAL_1_PH:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ] @@ -53,3 +54,80 @@ %retval.1 = phi i32 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ] ret i32 %retval.1 } + +; negative case: %arrayidx.b is not in header + +define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) { +; CHECK-LABEL: @IVIncHoist_not_all_user_in_header( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[C:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP13]], label [[FOR_BODY_PREHEADER:%.*]], label [[RETURN:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[C]] to i64 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4 +; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_NEXT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[RETURN_LOOPEXITSPLIT:%.*]], label [[FOR_BODY]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP0]] +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr [[UGLYGEP1]], align 4 +; CHECK-NEXT: [[TOBOOL3_NOT:%.*]] = icmp eq i32 [[VAL_A]], 0 +; CHECK-NEXT: br i1 [[TOBOOL3_NOT]], label [[IF_THEN:%.*]], label [[RETURN_LOOPEXITSPLIT]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP2]], i64 [[TMP1]] +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr [[UGLYGEP3]], align 4 +; CHECK-NEXT: [[TOBOOL4_NOT:%.*]] = icmp eq i32 [[VAL_B]], 0 +; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label [[FOR_COND]], label [[IF_THEN_RETURN_LOOPEXIT_CRIT_EDGE:%.*]] +; CHECK: return.loopexitsplit: +; CHECK-NEXT: [[RETVAL_1_PH_PH:%.*]] = phi i64 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] +; CHECK-NEXT: br label [[RETURN_LOOPEXIT:%.*]] +; CHECK: if.then.return.loopexit_crit_edge: +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDVARS_IV]], 3 +; CHECK-NEXT: br label [[RETURN_LOOPEXIT]] +; CHECK: return.loopexit: +; CHECK-NEXT: [[RETVAL_1_PH:%.*]] = phi i64 [ [[TMP2]], [[IF_THEN_RETURN_LOOPEXIT_CRIT_EDGE]] ], [ [[RETVAL_1_PH_PH]], [[RETURN_LOOPEXITSPLIT]] ] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[RETVAL_1_PH]], [[RETURN_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[RETVAL_1]] +; +entry: + %cmp13 = icmp sgt i32 %c, 0 + br i1 %cmp13, label %for.body.preheader, label %return + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %c to i64 + br label %for.body + +for.cond: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %return, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.cond ] + %indvars.iv.a = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next.a, %for.cond ] + %indvars.iv.b = phi i64 [ 2, %for.body.preheader ], [ %indvars.iv.next.b, %for.cond ] + %arrayidx.a = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.a + %indvars.iv.next.a = add nuw nsw i64 %indvars.iv.a, 1 + %val.a = load i32, ptr %arrayidx.a, align 4 + %tobool3.not = icmp eq i32 %val.a, 0 + br i1 %tobool3.not, label %if.then, label %return + +if.then: + %arrayidx.b = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.b + %indvars.iv.next.b = add nuw nsw i64 %indvars.iv.b, 1 + %val.b = load i32, ptr %arrayidx.b, align 4 + %tobool4.not = icmp eq i32 %val.b, 0 + br i1 %tobool4.not, label %for.cond, label %return + +return: ; preds = %for.cond, %for.body, %entry + %retval.1 = phi i64 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ], [ %indvars.iv.next.b, %if.then ] + ret i64 %retval.1 +}