diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -983,19 +983,35 @@ return B.CreateElementCount(Ty, VF); } -const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE) { +const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, + Loop *OrigLoop) { const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); assert(!isa(BackedgeTakenCount) && "Invalid loop count"); ScalarEvolution &SE = *PSE.getSE(); + unsigned BackEdgeSize = SE.getTypeSizeInBits(BackedgeTakenCount->getType()); + unsigned IdxSize = IdxTy->getPrimitiveSizeInBits(); + + // If we need to need to zero extend the backedge count, check if we can + // add one to it prior to zero extending without overflow. Provided this is + // safe, it allows better simplification of the +1. + if (OrigLoop && BackEdgeSize < IdxSize && + SE.isLoopEntryGuardedByCond( + OrigLoop, ICmpInst::ICMP_NE, BackedgeTakenCount, + SE.getNegativeSCEV(SE.getOne(BackedgeTakenCount->getType())))) { + return SE.getZeroExtendExpr( + SE.getAddExpr(BackedgeTakenCount, + SE.getOne(BackedgeTakenCount->getType()), SCEV::FlagNUW), + IdxTy); + } + // The exit count might have the type of i64 while the phi is i32. This can // happen if we have an induction variable that is sign extended before the // compare. The only way that we get a backedge taken count is that the // induction variable was signed and as such will not overflow. In such a case // truncation is legal. - if (SE.getTypeSizeInBits(BackedgeTakenCount->getType()) > - IdxTy->getPrimitiveSizeInBits()) + if (BackEdgeSize > IdxSize) BackedgeTakenCount = SE.getTruncateOrNoop(BackedgeTakenCount, IdxTy); BackedgeTakenCount = SE.getNoopOrZeroExtend(BackedgeTakenCount, IdxTy); @@ -2878,7 +2894,7 @@ // Find the loop boundaries. Type *IdxTy = Legal->getWidestInductionType(); assert(IdxTy && "No type for induction"); - const SCEV *ExitCount = createTripCountSCEV(IdxTy, PSE); + const SCEV *ExitCount = createTripCountSCEV(IdxTy, PSE, OrigLoop); const DataLayout &DL = InsertBlock->getModule()->getDataLayout(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -76,7 +76,8 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step); -const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE); +const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, + Loop *CurLoop = nullptr); /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll @@ -11,33 +11,31 @@ ; LMUL1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0 ; LMUL1-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; LMUL1: for.body.preheader: -; LMUL1-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1 -; LMUL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; LMUL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; LMUL1-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64 +; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 ; LMUL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL1: vector.ph: -; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 +; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL1: vector.body: ; LMUL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] +; LMUL1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; LMUL1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] +; LMUL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 +; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP1]] ; LMUL1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 -; LMUL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] -; LMUL1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; LMUL1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; LMUL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] -; LMUL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; LMUL1-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; LMUL1-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 +; LMUL1-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; LMUL1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP1]] +; LMUL1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; LMUL1-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP8]], align 4 ; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; LMUL1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; LMUL1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; LMUL1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; LMUL1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL1: middle.block: -; LMUL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; LMUL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; LMUL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; LMUL1: scalar.ph: ; LMUL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -45,10 +43,10 @@ ; LMUL1: for.body: ; LMUL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; LMUL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; LMUL1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; LMUL1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; LMUL1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; LMUL1-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; LMUL1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; LMUL1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; LMUL1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] ; LMUL1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] ; LMUL1-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 ; LMUL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -65,33 +63,31 @@ ; LMUL2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0 ; LMUL2-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; LMUL2: for.body.preheader: -; LMUL2-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1 -; LMUL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; LMUL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; LMUL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; LMUL2-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64 +; LMUL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 ; LMUL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL2: vector.ph: -; LMUL2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; LMUL2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; LMUL2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 +; LMUL2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; LMUL2-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL2: vector.body: ; LMUL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; LMUL2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; LMUL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] +; LMUL2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; LMUL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] +; LMUL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 +; LMUL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP1]] ; LMUL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 -; LMUL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] -; LMUL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; LMUL2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; LMUL2-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; LMUL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] -; LMUL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; LMUL2-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; LMUL2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 +; LMUL2-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; LMUL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP1]] +; LMUL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; LMUL2-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP8]], align 4 ; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; LMUL2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; LMUL2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; LMUL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; LMUL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL2: middle.block: -; LMUL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; LMUL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; LMUL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; LMUL2: scalar.ph: ; LMUL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -99,10 +95,10 @@ ; LMUL2: for.body: ; LMUL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; LMUL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; LMUL2-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; LMUL2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; LMUL2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; LMUL2-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; LMUL2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; LMUL2-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; LMUL2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] ; LMUL2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] ; LMUL2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 ; LMUL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -9,47 +9,46 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[ITER_CHECK:%.*]], label [[DOT_CRIT_EDGE:%.*]] ; CHECK: iter.check: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -3 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP4]], 32 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], 32 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 32 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = add i64 3, [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <32 x i8> [[VEC_IND]], ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <32 x i8> [[VEC_IND]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 3, [[N_VEC]] -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 16 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP4]], 16 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF2]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], 16 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END4:%.*]] = add i64 3, [[N_VEC3]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i8 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i8 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i8> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -57,25 +56,25 @@ ; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND9:%.*]] = phi <16 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX11:%.*]] = add i64 3, [[INDEX8]] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX11]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 -; CHECK-NEXT: store <16 x i8> [[VEC_IND9]], ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX11]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store <16 x i8> [[VEC_IND9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX8]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC3]] +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 [[TMP14]], ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV]] to i8 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 [[TMP13]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -107,14 +106,13 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 9 ; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] ; CHECK: .lr.ph.preheader: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -10 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -9 ; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = add i64 9, [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -124,36 +122,36 @@ ; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <2 x i64> [[STEP_ADD]], ; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <2 x i64> [[STEP_ADD1]], ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 9, [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD1]], -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP12:%.*]] = sitofp <2 x i64> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP13:%.*]] = sitofp <2 x i64> [[TMP9]] to <2 x float> ; CHECK-NEXT: [[TMP14:%.*]] = sitofp <2 x i64> [[TMP10]] to <2 x float> ; CHECK-NEXT: [[TMP15:%.*]] = sitofp <2 x i64> [[TMP11]] to <2 x float> -; CHECK-NEXT: [[TMP16:%.*]] = sitofp <2 x i64> [[TMP12]] to <2 x float> -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 0 +; CHECK-NEXT: store <2 x float> [[TMP12]], ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 2 ; CHECK-NEXT: store <2 x float> [[TMP13]], ptr [[TMP21]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 4 ; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 4 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 6 ; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[TMP23]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 6 -; CHECK-NEXT: store <2 x float> [[TMP16]], ptr [[TMP24]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD2]], -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 9, [[DOTLR_PH_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -27,13 +27,12 @@ ; CHECK: for.body3.lr.ph.us.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[K:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[M]] to i64 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV33:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP4]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[ADD10_US:%.*]] = add nsw i32 [[TMP3]], 3 ; CHECK-NEXT: store i32 [[ADD10_US]], ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 @@ -41,12 +40,12 @@ ; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: for.body3.us: ; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ [[BC_RESUME_VAL:%.*]], [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 -; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 +; CHECK-NEXT: [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP4]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD4_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[ADD5_US:%.*]] = add nsw i32 [[TMP5]], 1 ; CHECK-NEXT: store i32 [[ADD5_US]], ptr [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 @@ -54,38 +53,38 @@ ; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]] -; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 -; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP9]], [[K]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]] +; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 +; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP8]], [[K]] ; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV33]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP7]], [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[OFFSET_IDX]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[ADD_US]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP18]], i32 3 -; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[OFFSET_IDX]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[ADD_US]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP17]], i32 3 +; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -74,7 +74,7 @@ ; UNROLL-NO-IC-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL-NO-IC: for.exit: ; UNROLL-NO-IC-NEXT: ret void ; @@ -130,7 +130,7 @@ ; UNROLL-NO-VF-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL-NO-VF: for.exit: ; UNROLL-NO-VF-NEXT: ret void ; @@ -185,7 +185,7 @@ ; SINK-AFTER-NEXT: store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SINK-AFTER: for.exit: ; SINK-AFTER-NEXT: ret void ; @@ -230,70 +230,68 @@ ; UNROLL-NO-IC: for.preheader: ; UNROLL-NO-IC-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1 ; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: -; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 -; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8 +; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 4 -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP8]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i32> [[TMP11]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp sgt <4 x i32> [[TMP12]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> [[TMP11]], <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP15]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP16]] -; UNROLL-NO-IC-NEXT: [[TMP19]] = select <4 x i1> [[TMP17]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP15]] -; UNROLL-NO-IC-NEXT: [[TMP20]] = select <4 x i1> [[TMP18]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP16]] +; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP6]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP8]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[TMP9]], zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP9]], <4 x i32> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> [[TMP10]], <4 x i32> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP13]] +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP14]] +; UNROLL-NO-IC-NEXT: [[TMP17]] = select <4 x i1> [[TMP15]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP13]] +; UNROLL-NO-IC-NEXT: [[TMP18]] = select <4 x i1> [[TMP16]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP14]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP19]], [[TMP20]] -; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP19]], <4 x i32> [[TMP20]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]]) -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP17]], [[TMP18]] +; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP17]], <4 x i32> [[TMP18]] +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]]) +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: for.cond.cleanup.loopexit: -; UNROLL-NO-IC-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_COND_CLEANUP]] ; UNROLL-NO-IC: for.cond.cleanup: ; UNROLL-NO-IC-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; UNROLL-NO-IC-NEXT: [[TMP23]] = load i32, ptr [[ARRAYIDX]], align 4 -; UNROLL-NO-IC-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP23]], [[SCALAR_RECUR]] +; UNROLL-NO-IC-NEXT: [[TMP21]] = load i32, ptr [[ARRAYIDX]], align 4 +; UNROLL-NO-IC-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP21]], [[SCALAR_RECUR]] ; UNROLL-NO-IC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 ; UNROLL-NO-IC-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] @@ -310,46 +308,44 @@ ; UNROLL-NO-VF: for.preheader: ; UNROLL-NO-VF-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1 ; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 ; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: -; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 -; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP8]] = load i32, ptr [[TMP6]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sub nsw i32 [[TMP7]], [[VECTOR_RECUR]] -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sub nsw i32 [[TMP8]], [[TMP7]] -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], 0 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP10]], 0 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 0 -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], i32 [[TMP10]], i32 0 -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP13]] -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp slt i32 [[VEC_PHI1]], [[TMP14]] -; UNROLL-NO-VF-NEXT: [[TMP17]] = select i1 [[TMP15]], i32 [[VEC_PHI]], i32 [[TMP13]] -; UNROLL-NO-VF-NEXT: [[TMP18]] = select i1 [[TMP16]], i32 [[VEC_PHI1]], i32 [[TMP14]] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP6]] = load i32, ptr [[TMP4]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[VECTOR_RECUR]] +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP6]], [[TMP5]] +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 0 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], 0 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i32 [[TMP7]], i32 0 +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 0 +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP11]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp slt i32 [[VEC_PHI1]], [[TMP12]] +; UNROLL-NO-VF-NEXT: [[TMP15]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]] +; UNROLL-NO-VF-NEXT: [[TMP16]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] -; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP17]], i32 [[TMP18]] -; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]] +; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP15]], i32 [[TMP16]] +; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] @@ -360,12 +356,12 @@ ; UNROLL-NO-VF-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; UNROLL-NO-VF-NEXT: [[TMP20]] = load i32, ptr [[ARRAYIDX]], align 4 -; UNROLL-NO-VF-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP20]], [[SCALAR_RECUR]] +; UNROLL-NO-VF-NEXT: [[TMP18]] = load i32, ptr [[ARRAYIDX]], align 4 +; UNROLL-NO-VF-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]] ; UNROLL-NO-VF-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 ; UNROLL-NO-VF-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] @@ -373,7 +369,7 @@ ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; ; SINK-AFTER-LABEL: @recurrence_2( ; SINK-AFTER-NEXT: entry: @@ -382,57 +378,55 @@ ; SINK-AFTER: for.preheader: ; SINK-AFTER-NEXT: [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1 ; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4 -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; SINK-AFTER-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 ; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: -; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 -; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3 ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] -; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP5]], align 4 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP6]] -; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer -; SINK-AFTER-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[TMP10:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP9]] -; SINK-AFTER-NEXT: [[TMP11]] = select <4 x i1> [[TMP10]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP9]] +; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP3]], align 4 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP4]] +; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP5]], zeroinitializer +; SINK-AFTER-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP5]], <4 x i32> zeroinitializer +; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP7]] +; SINK-AFTER-NEXT: [[TMP9]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP11]]) -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]]) +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] -; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: for.cond.cleanup.loopexit: -; SINK-AFTER-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: br label [[FOR_COND_CLEANUP]] ; SINK-AFTER: for.cond.cleanup: ; SINK-AFTER-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; SINK-AFTER-NEXT: ret i32 [[MINMAX_0_LCSSA]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; SINK-AFTER-NEXT: [[TMP14]] = load i32, ptr [[ARRAYIDX]], align 4 -; SINK-AFTER-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP14]], [[SCALAR_RECUR]] +; SINK-AFTER-NEXT: [[TMP12]] = load i32, ptr [[ARRAYIDX]], align 4 +; SINK-AFTER-NEXT: [[SUB3:%.*]] = sub nsw i32 [[TMP12]], [[SCALAR_RECUR]] ; SINK-AFTER-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0 ; SINK-AFTER-NEXT: [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0 ; SINK-AFTER-NEXT: [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]] @@ -498,14 +492,13 @@ ; UNROLL-NO-IC-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1 ; UNROLL-NO-IC-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]] ; UNROLL-NO-IC: for.preheader: -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8 +; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: -; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8 -; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 +; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0 @@ -517,35 +510,35 @@ ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0 -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4 -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP9]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double> +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP8]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double> +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP9]] to <4 x double> ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sitofp <4 x i16> [[TMP10]] to <4 x double> -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double> -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fmul fast <4 x double> [[TMP14]], [[BROADCAST_SPLAT]] -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = fmul fast <4 x double> [[TMP15]], [[BROADCAST_SPLAT3]] +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[TMP13]], [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fmul fast <4 x double> [[TMP14]], [[BROADCAST_SPLAT3]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = fsub fast <4 x double> [[TMP11]], [[TMP15]] ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP16]] -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = fsub fast <4 x double> [[TMP13]], [[TMP17]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP3]] ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i32 0 +; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP17]], ptr [[TMP21]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i32 4 ; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP18]], ptr [[TMP22]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 4 -; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP19]], ptr [[TMP23]], align 8 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -554,11 +547,11 @@ ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]] -; UNROLL-NO-IC-NEXT: [[TMP25]] = load i16, ptr [[ARRAYIDX5]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP25]] to double +; UNROLL-NO-IC-NEXT: [[TMP24]] = load i16, ptr [[ARRAYIDX5]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP24]] to double ; UNROLL-NO-IC-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; UNROLL-NO-IC-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; UNROLL-NO-IC-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -585,54 +578,53 @@ ; UNROLL-NO-VF-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1 ; UNROLL-NO-VF-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]] ; UNROLL-NO-VF: for.preheader: -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 +; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 ; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: -; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 -; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 +; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]] ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]] -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP5]] -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP6]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP9]] = load i16, ptr [[TMP7]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP8]] = load i16, ptr [[TMP6]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sitofp i16 [[TMP7]] to double ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sitofp i16 [[TMP8]] to double -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[TMP9]] to double -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sitofp i16 [[TMP8]] to double +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[TMP7]] to double +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = fmul fast double [[TMP11]], [[CONV1]] ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = fmul fast double [[TMP12]], [[CONV1]] -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fmul fast double [[TMP13]], [[CONV1]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fsub fast double [[TMP9]], [[TMP13]] ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = fsub fast double [[TMP10]], [[TMP14]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = fsub fast double [[TMP11]], [[TMP15]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP3]] ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]] -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP5]] +; UNROLL-NO-VF-NEXT: store double [[TMP15]], ptr [[TMP17]], align 8 ; UNROLL-NO-VF-NEXT: store double [[TMP16]], ptr [[TMP18]], align 8 -; UNROLL-NO-VF-NEXT: store double [[TMP17]], ptr [[TMP19]], align 8 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]] -; UNROLL-NO-VF-NEXT: [[TMP21]] = load i16, ptr [[ARRAYIDX5]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP21]] to double +; UNROLL-NO-VF-NEXT: [[TMP20]] = load i16, ptr [[ARRAYIDX5]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP20]] to double ; UNROLL-NO-VF-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; UNROLL-NO-VF-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; UNROLL-NO-VF-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -641,7 +633,7 @@ ; UNROLL-NO-VF-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; UNROLL-NO-VF: for.end.loopexit: ; UNROLL-NO-VF-NEXT: br label [[FOR_END]] ; UNROLL-NO-VF: for.end: @@ -659,14 +651,13 @@ ; SINK-AFTER-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1 ; SINK-AFTER-NEXT: br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]] ; SINK-AFTER: for.preheader: -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[N]], -2 +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 +; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 ; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: -; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 -; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[IND_END:%.*]] = add i64 1, [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3 ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0 @@ -676,23 +667,23 @@ ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]] -; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP6]], align 2 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> -; SINK-AFTER-NEXT: [[TMP9:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x double> -; SINK-AFTER-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[TMP9]], [[BROADCAST_SPLAT]] -; SINK-AFTER-NEXT: [[TMP11:%.*]] = fsub fast <4 x double> [[TMP8]], [[TMP10]] -; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]] -; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i32 0 -; SINK-AFTER-NEXT: store <4 x double> [[TMP11]], ptr [[TMP13]], align 8 +; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]] +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP5]], align 2 +; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP7:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> +; SINK-AFTER-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP6]] to <4 x double> +; SINK-AFTER-NEXT: [[TMP9:%.*]] = fmul fast <4 x double> [[TMP8]], [[BROADCAST_SPLAT]] +; SINK-AFTER-NEXT: [[TMP10:%.*]] = fsub fast <4 x double> [[TMP7]], [[TMP9]] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP3]] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i32 0 +; SINK-AFTER-NEXT: store <4 x double> [[TMP10]], ptr [[TMP12]], align 8 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -701,11 +692,11 @@ ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]] -; SINK-AFTER-NEXT: [[TMP15]] = load i16, ptr [[ARRAYIDX5]], align 2 -; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP15]] to double +; SINK-AFTER-NEXT: [[TMP14]] = load i16, ptr [[ARRAYIDX5]], align 2 +; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP14]] to double ; SINK-AFTER-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; SINK-AFTER-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; SINK-AFTER-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -980,18 +971,18 @@ ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]] -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP2]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: @@ -1002,9 +993,9 @@ ; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1 ; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 -; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-VF: for.cond.cleanup3: -; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 ; UNROLL-NO-VF-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 @@ -1199,7 +1190,7 @@ ; UNROLL-NO-VF-NEXT: [[TMP10]] = load i32, ptr [[TMP8]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1214,7 +1205,7 @@ ; UNROLL-NO-VF-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]] ; UNROLL-NO-VF-NEXT: [[VAR2]] = load i32, ptr [[VAR1]], align 4 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -1338,7 +1329,7 @@ ; UNROLL-NO-VF-NEXT: [[TMP1]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1352,7 +1343,7 @@ ; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef -; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -1464,7 +1455,7 @@ ; UNROLL-NO-VF-NEXT: [[TMP3]] = add i32 [[TMP1]], [[X]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1479,7 +1470,7 @@ ; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 -; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -1695,7 +1686,7 @@ ; UNROLL-NO-VF-NEXT: [[TMP15]] = add i32 [[VEC_PHI3]], [[TMP13]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]] ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240 @@ -1723,7 +1714,7 @@ ; UNROLL-NO-VF-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1 ; UNROLL-NO-VF-NEXT: [[ADD_PTR]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 25 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; ; SINK-AFTER-LABEL: @PR33613( ; SINK-AFTER-NEXT: entry: @@ -1930,7 +1921,7 @@ ; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], ptr [[TMP15]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1950,7 +1941,7 @@ ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -2175,7 +2166,7 @@ ; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], ptr [[TMP15]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2197,7 +2188,7 @@ ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -2405,7 +2396,7 @@ ; UNROLL-NO-VF-NEXT: store i32 [[TMP15]], ptr [[TMP17]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2426,7 +2417,7 @@ ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -2683,7 +2674,7 @@ ; UNROLL-NO-VF-NEXT: [[TMP7]] = add i16 [[TMP3]], 5 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42 -; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 42 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2701,7 +2692,7 @@ ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 -; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -2943,7 +2934,7 @@ ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP11]], i32 [[VEC_PHI1]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25:![0-9]+]], !llvm.loop [[LOOP26:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] @@ -2963,7 +2954,7 @@ ; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF28:![0-9]+]], !llvm.loop [[LOOP29:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region( ; SINK-AFTER-NEXT: bb: @@ -3332,7 +3323,7 @@ ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = select i1 [[TMP5]], i32 [[TMP11]], i32 [[VEC_PHI2]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25]], !llvm.loop [[LOOP29:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF26]], !llvm.loop [[LOOP30:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP17]], [[TMP16]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] @@ -3357,7 +3348,7 @@ ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27]], !llvm.loop [[LOOP30:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF28]], !llvm.loop [[LOOP31:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region_multiple( ; SINK-AFTER-NEXT: bb: @@ -3589,7 +3580,7 @@ ; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP9]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NO-VF-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -3610,7 +3601,7 @@ ; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] ; UNROLL-NO-VF-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP32:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ;