Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1446,21 +1446,15 @@ // \p VF is the vectorization factor that will be used to vectorize \p I. // Superset of instructions that return true for isScalarWithPredication. bool isPredicatedInst(Instruction *I, ElementCount VF) { - // When we know the load's address is loop invariant and the instruction - // in the original scalar loop was unconditionally executed then we - // don't need to mark it as a predicated instruction. Tail folding may - // introduce additional predication, but we're guaranteed to always have - // at least one active lane. We call Legal->blockNeedsPredication here - // because it doesn't query tail-folding. - if (Legal->isUniformMemOp(*I) && isa(I) && - !Legal->blockNeedsPredication(I->getParent())) - return false; if (!blockNeedsPredicationForAnyReason(I->getParent())) return false; // Loads and stores that need some form of masked operation are predicated - // instructions. + // instructions. However, for uniform memory ops we know the address isn't + // changing. Since tail folding guarantees at least one active lane, we + // can discount predication inserted solely for that purpose. if (isa(I) || isa(I)) - return Legal->isMaskRequired(I); + return Legal->isUniformMemOp(*I) ? Legal->blockNeedsPredication(I->getParent()) : + Legal->isMaskRequired(I); return isScalarWithPredication(I, VF); } Index: llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -90,12 +90,14 @@ ; FORCE: vector.body: ; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ] ; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE4]] ] +; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], +; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1 +; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1 ; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; FORCE: pred.load.if: -; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1 ; FORCE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP0]] ; FORCE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 1 ; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]] @@ -104,8 +106,6 @@ ; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] ; FORCE: pred.load.if1: -; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1 ; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP1]] ; FORCE-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 1 ; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE4]] Index: llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -216,49 +216,49 @@ ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK: pred.store.if: +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 -; CHECK-NEXT: store i64 [[TMP4]], i64* [[B:%.*]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_STORE_IF]] ] +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] -; CHECK: pred.store.if1: +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK: pred.load.if1: ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 -; CHECK-NEXT: store i64 [[TMP9]], i64* [[B]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP9]], [[PRED_STORE_IF1]] ] +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] +; CHECK: pred.load.continue2: +; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP9]], [[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK: pred.load.if3: ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], i64* [[B]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP14]], [[PRED_STORE_IF3]] ] +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK: pred.load.continue4: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE2]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.if5: +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.if5: ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP18]], align 8 -; CHECK-NEXT: store i64 [[TMP19]], i64* [[B]], align 8 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP20:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP19]], [[PRED_STORE_IF5]] ] +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP20:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: store i64 [[TMP5]], i64* [[B:%.*]], align 8 +; CHECK-NEXT: store i64 [[TMP10]], i64* [[B]], align 8 +; CHECK-NEXT: store i64 [[TMP15]], i64* [[B]], align 8 +; CHECK-NEXT: store i64 [[TMP20]], i64* [[B]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 @@ -285,51 +285,51 @@ ; VF2UF2: vector.ph: ; VF2UF2-NEXT: br label [[VECTOR_BODY:%.*]] ; VF2UF2: vector.body: -; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] -; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] +; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE7:%.*]] ] +; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE7]] ] ; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], ; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; VF2UF2: pred.store.if: +; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; VF2UF2: pred.load.if: ; VF2UF2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP3]] ; VF2UF2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 -; VF2UF2-NEXT: store i64 [[TMP5]], i64* [[B:%.*]], align 8 -; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]] -; VF2UF2: pred.store.continue: -; VF2UF2-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ] +; VF2UF2-NEXT: br label [[PRED_LOAD_CONTINUE]] +; VF2UF2: pred.load.continue: +; VF2UF2-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] ; VF2UF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; VF2UF2-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] -; VF2UF2: pred.store.if2: +; VF2UF2-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF2:%.*]], label [[PRED_LOAD_CONTINUE3:%.*]] +; VF2UF2: pred.load.if2: ; VF2UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 ; VF2UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]] ; VF2UF2-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 -; VF2UF2-NEXT: store i64 [[TMP10]], i64* [[B]], align 8 -; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE3]] -; VF2UF2: pred.store.continue3: -; VF2UF2-NEXT: [[TMP11:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP10]], [[PRED_STORE_IF2]] ] +; VF2UF2-NEXT: br label [[PRED_LOAD_CONTINUE3]] +; VF2UF2: pred.load.continue3: +; VF2UF2-NEXT: [[TMP11:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF2]] ] ; VF2UF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; VF2UF2-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; VF2UF2: pred.store.if4: +; VF2UF2-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]] +; VF2UF2: pred.load.if4: ; VF2UF2-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2 ; VF2UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP13]] ; VF2UF2-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 -; VF2UF2-NEXT: store i64 [[TMP15]], i64* [[B]], align 8 -; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]] -; VF2UF2: pred.store.continue5: -; VF2UF2-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE3]] ], [ [[TMP15]], [[PRED_STORE_IF4]] ] +; VF2UF2-NEXT: br label [[PRED_LOAD_CONTINUE5]] +; VF2UF2: pred.load.continue5: +; VF2UF2-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE3]] ], [ [[TMP15]], [[PRED_LOAD_IF4]] ] ; VF2UF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] -; VF2UF2: pred.store.if6: +; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7]] +; VF2UF2: pred.load.if6: ; VF2UF2-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3 ; VF2UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP18]] ; VF2UF2-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP19]], align 8 -; VF2UF2-NEXT: store i64 [[TMP20]], i64* [[B]], align 8 -; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] -; VF2UF2: pred.store.continue7: -; VF2UF2-NEXT: [[TMP21:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE5]] ], [ [[TMP20]], [[PRED_STORE_IF6]] ] +; VF2UF2-NEXT: br label [[PRED_LOAD_CONTINUE7]] +; VF2UF2: pred.load.continue7: +; VF2UF2-NEXT: [[TMP21:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE5]] ], [ [[TMP20]], [[PRED_LOAD_IF6]] ] +; VF2UF2-NEXT: store i64 [[TMP6]], i64* [[B:%.*]], align 8 +; VF2UF2-NEXT: store i64 [[TMP11]], i64* [[B]], align 8 +; VF2UF2-NEXT: store i64 [[TMP16]], i64* [[B]], align 8 +; VF2UF2-NEXT: store i64 [[TMP21]], i64* [[B]], align 8 ; VF2UF2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], ; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 @@ -356,7 +356,7 @@ ; VF1UF4: vector.ph: ; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF1UF4: vector.body: -; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] +; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE12:%.*]] ] ; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 ; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 ; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 @@ -365,42 +365,42 @@ ; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 13 ; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 13 ; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 13 -; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; VF1UF4: pred.store.if: +; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; VF1UF4: pred.load.if: ; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 ; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDUCTION]] ; VF1UF4-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 -; VF1UF4-NEXT: store i64 [[TMP5]], i64* [[B:%.*]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] -; VF1UF4: pred.store.continue: -; VF1UF4-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ] -; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; VF1UF4: pred.store.if7: +; VF1UF4-NEXT: br label [[PRED_LOAD_CONTINUE]] +; VF1UF4: pred.load.continue: +; VF1UF4-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] +; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; VF1UF4: pred.load.if7: ; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 ; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION1]] ; VF1UF4-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 -; VF1UF4-NEXT: store i64 [[TMP8]], i64* [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]] -; VF1UF4: pred.store.continue8: -; VF1UF4-NEXT: [[TMP9:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP8]], [[PRED_STORE_IF7]] ] -; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; VF1UF4: pred.store.if9: +; VF1UF4-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; VF1UF4: pred.load.continue8: +; VF1UF4-NEXT: [[TMP9:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP8]], [[PRED_LOAD_IF7]] ] +; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] +; VF1UF4: pred.load.if9: ; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2 ; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION2]] ; VF1UF4-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 -; VF1UF4-NEXT: store i64 [[TMP11]], i64* [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]] -; VF1UF4: pred.store.continue10: -; VF1UF4-NEXT: [[TMP12:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE8]] ], [ [[TMP11]], [[PRED_STORE_IF9]] ] -; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.if11: +; VF1UF4-NEXT: br label [[PRED_LOAD_CONTINUE10]] +; VF1UF4: pred.load.continue10: +; VF1UF4-NEXT: [[TMP12:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE8]] ], [ [[TMP11]], [[PRED_LOAD_IF9]] ] +; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12]] +; VF1UF4: pred.load.if11: ; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3 ; VF1UF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION3]] ; VF1UF4-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 -; VF1UF4-NEXT: store i64 [[TMP14]], i64* [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.continue12: -; VF1UF4-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE10]] ], [ [[TMP14]], [[PRED_STORE_IF11]] ] +; VF1UF4-NEXT: br label [[PRED_LOAD_CONTINUE12]] +; VF1UF4: pred.load.continue12: +; VF1UF4-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_LOAD_CONTINUE10]] ], [ [[TMP14]], [[PRED_LOAD_IF11]] ] +; VF1UF4-NEXT: store i64 [[TMP6]], i64* [[B:%.*]], align 8 +; VF1UF4-NEXT: store i64 [[TMP9]], i64* [[B]], align 8 +; VF1UF4-NEXT: store i64 [[TMP12]], i64* [[B]], align 8 +; VF1UF4-NEXT: store i64 [[TMP15]], i64* [[B]], align 8 ; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; VF1UF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] Index: llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -24,31 +24,14 @@ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[INC]] -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP2]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK: pred.store.if: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.if3: ; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: