diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4209,10 +4209,11 @@ auto *I = dyn_cast(Worklist.pop_back_val()); // We can't sink an instruction if it is a phi node, is not in the loop, - // or may have side effects. + // may have side effects or may read from memory. + // TODO Could dor more granular checking to allow sinking a load past non-store instructions. if (!I || isa(I) || !VectorLoop->contains(I) || - I->mayHaveSideEffects()) - continue; + I->mayHaveSideEffects() || I->mayReadFromMemory()) + continue; // If the instruction is already in PredBB, check if we can sink its // operands. In that case, VPlan's sinkScalarOperands() succeeded in diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -465,34 +465,34 @@ ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23 -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP4]], 23 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP5]], 23 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP4]], 100 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP5]], 100 -; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !23 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !23 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23 +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100 +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP5]], 100 +; UNROLL-NO-VF-NEXT: br i1 [[TMP12]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.sdiv.if: -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope !23 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sdiv i32 [[TMP6]], [[TMP4]] -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sdiv i32 [[TMP11]], [[TMP12]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = sdiv i32 [[TMP10]], [[TMP4]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = sdiv i32 [[TMP8]], [[TMP14]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE]] ; UNROLL-NO-VF: pred.sdiv.continue: -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP12]], [[PRED_SDIV_IF]] ] -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_SDIV_IF]] ] -; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_SDIV_IF]] ] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_SDIV_IF]] ] +; UNROLL-NO-VF-NEXT: br i1 [[TMP13]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.if2: -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope !23 -; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = sdiv i32 [[TMP7]], [[TMP5]] -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = sdiv i32 [[TMP17]], [[TMP18]] +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = sdiv i32 [[TMP11]], [[TMP5]] +; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = sdiv i32 [[TMP9]], [[TMP18]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.continue3: ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_SDIV_IF2]] ] ; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP19]], [[PRED_SDIV_IF2]] ] -; UNROLL-NO-VF-NEXT: [[TMP22:%.*]] = xor i1 [[TMP8]], true -; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = xor i1 [[TMP9]], true -; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP22]], i32 [[TMP6]], i32 [[TMP15]] -; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP23]], i32 [[TMP7]], i32 [[TMP21]] +; UNROLL-NO-VF-NEXT: [[TMP22:%.*]] = xor i1 [[TMP12]], true +; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = xor i1 [[TMP13]], true +; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[TMP17]] +; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[TMP21]] ; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23 ; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -664,44 +664,44 @@ ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32 -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP4]], 23 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP5]], 23 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP4]], 100 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP5]], 100 -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = icmp sge i32 [[TMP4]], 200 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp sge i32 [[TMP5]], 200 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = xor i1 [[TMP8]], true, !dbg [[DBG34:![0-9]+]] -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = xor i1 [[TMP9]], true, !dbg [[DBG34]] -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], i1 [[TMP10]], i1 false, !dbg [[DBG35:![0-9]+]] -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = select i1 [[TMP13]], i1 [[TMP11]], i1 false, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP8]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP9]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !32 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !32 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23 +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100 +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP5]], 100 +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp sge i32 [[TMP4]], 200 +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp sge i32 [[TMP5]], 200 +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true, !dbg [[DBG34:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true, !dbg [[DBG34]] +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = select i1 [[TMP16]], i1 [[TMP14]], i1 false, !dbg [[DBG35:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i1 [[TMP15]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP12]] +; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = or i1 [[TMP19]], [[TMP13]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP20]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.sdiv.if: -; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !alias.scope !32 -; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = sdiv i32 [[TMP6]], [[TMP4]] -; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = sdiv i32 [[TMP19]], [[TMP20]] +; UNROLL-NO-VF-NEXT: [[TMP22:%.*]] = sdiv i32 [[TMP10]], [[TMP4]] +; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = sdiv i32 [[TMP8]], [[TMP22]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE]] ; UNROLL-NO-VF: pred.sdiv.continue: -; UNROLL-NO-VF-NEXT: [[TMP22:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP20]], [[PRED_SDIV_IF]] ] -; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP21]], [[PRED_SDIV_IF]] ] -; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] +; UNROLL-NO-VF-NEXT: [[TMP24:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP22]], [[PRED_SDIV_IF]] ] +; UNROLL-NO-VF-NEXT: [[TMP25:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP23]], [[PRED_SDIV_IF]] ] +; UNROLL-NO-VF-NEXT: br i1 [[TMP21]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.if2: -; UNROLL-NO-VF-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !alias.scope !32 -; UNROLL-NO-VF-NEXT: [[TMP26:%.*]] = sdiv i32 [[TMP7]], [[TMP5]] -; UNROLL-NO-VF-NEXT: [[TMP27:%.*]] = sdiv i32 [[TMP25]], [[TMP26]] +; UNROLL-NO-VF-NEXT: [[TMP26:%.*]] = sdiv i32 [[TMP11]], [[TMP5]] +; UNROLL-NO-VF-NEXT: [[TMP27:%.*]] = sdiv i32 [[TMP9]], [[TMP26]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.continue3: ; UNROLL-NO-VF-NEXT: [[TMP28:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF2]] ] ; UNROLL-NO-VF-NEXT: [[TMP29:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP27]], [[PRED_SDIV_IF2]] ] -; UNROLL-NO-VF-NEXT: [[TMP30:%.*]] = xor i1 [[TMP10]], true, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP31:%.*]] = xor i1 [[TMP11]], true, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP32:%.*]] = select i1 [[TMP12]], i1 [[TMP30]], i1 false, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[TMP33:%.*]] = select i1 [[TMP13]], i1 [[TMP31]], i1 false, !dbg [[DBG35]] -; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP32]], i32 [[TMP6]], i32 [[TMP23]] -; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP33]], i32 [[TMP7]], i32 [[TMP29]] +; UNROLL-NO-VF-NEXT: [[TMP30:%.*]] = xor i1 [[TMP14]], true, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP31:%.*]] = xor i1 [[TMP15]], true, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP32:%.*]] = select i1 [[TMP16]], i1 [[TMP30]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[TMP33:%.*]] = select i1 [[TMP17]], i1 [[TMP31]], i1 false, !dbg [[DBG35]] +; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP32]], i32 [[TMP10]], i32 [[TMP25]] +; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[TMP33]], i32 [[TMP11]], i32 [[TMP29]] ; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32 ; UNROLL-NO-VF-NEXT: store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -51,7 +51,7 @@ ; UNROLL: for.inc: ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret i32 0 ; @@ -104,7 +104,7 @@ ; UNROLL-NOSIMPLIFY: for.inc: ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret i32 0 ; @@ -121,20 +121,20 @@ ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC: pred.store.if: -; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP0]] -; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 -; VEC-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP5]], 20 -; VEC-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 +; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP0]] +; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 +; VEC-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP6]], 20 +; VEC-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC: pred.store.continue: ; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if1: ; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 -; VEC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]] -; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 -; VEC-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], 20 -; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP12]], align 4 +; VEC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]] +; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 +; VEC-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], 20 +; VEC-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -156,7 +156,7 @@ ; VEC: for.inc: ; VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VEC: for.end: ; VEC-NEXT: ret i32 0 ; @@ -213,16 +213,16 @@ ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[V_2:%.*]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] +; UNROLL-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1 +; UNROLL-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP5]] +; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP6]] +; UNROLL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +; UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 ; UNROLL-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL: pred.store.if: -; UNROLL-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP5]] -; UNROLL-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; UNROLL-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 -; UNROLL-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1 -; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP8]] -; UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; UNROLL-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +; UNROLL-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 +; UNROLL-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]] ; UNROLL: pred.store.continue3: ; UNROLL-NEXT: [[TMP11:%.*]] = add i32 [[VEC_PHI]], 1 @@ -233,7 +233,7 @@ ; UNROLL-NEXT: [[PREDPHI4]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI4]], [[PREDPHI]] ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] @@ -285,20 +285,20 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[V_2:%.*]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP4]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP5]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP4]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.if2: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP7]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.continue3: ; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = add i32 [[VEC_PHI]], 1 @@ -309,7 +309,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI4]] = select i1 [[TMP13]], i32 [[VEC_PHI1]], i32 [[TMP11]] ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI4]], [[PREDPHI]] ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] @@ -333,7 +333,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.inc26.loopexit: ; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]] @@ -459,25 +459,25 @@ ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] +; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] +; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr undef, i64 [[TMP1]] +; UNROLL-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1 +; UNROLL-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1 ; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL: pred.store.if: -; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] -; UNROLL-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -; UNROLL-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 -; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 -; UNROLL-NEXT: store i8 [[TMP4]], ptr [[TMP1]], align 1 -; UNROLL-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP5]] -; UNROLL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 -; UNROLL-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; UNROLL-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32 +; UNROLL-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 +; UNROLL-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1 +; UNROLL-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32 ; UNROLL-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 -; UNROLL-NEXT: store i8 [[TMP9]], ptr [[TMP6]], align 1 +; UNROLL-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]] ; UNROLL: pred.store.continue3: ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] @@ -496,7 +496,7 @@ ; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -507,29 +507,29 @@ ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: vector.body: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr undef, i64 [[TMP1]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 -; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP1]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 +; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.if2: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP5]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 -; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP6]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.continue3: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -552,7 +552,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; @@ -572,9 +572,9 @@ ; VEC: pred.store.if: ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 -; VEC-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] -; VEC-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; VEC-NEXT: store i8 [[TMP6]], ptr [[TMP7]], align 1 +; VEC-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] +; VEC-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP5]] to i8 +; VEC-NEXT: store i8 [[TMP7]], ptr [[TMP6]], align 1 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC: pred.store.continue: ; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 @@ -583,9 +583,9 @@ ; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 -; VEC-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr undef, i64 [[TMP9]] -; VEC-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 -; VEC-NEXT: store i8 [[TMP12]], ptr [[TMP13]], align 1 +; VEC-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr undef, i64 [[TMP9]] +; VEC-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP11]] to i8 +; VEC-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE3]] ; VEC: pred.store.continue3: ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -659,7 +659,7 @@ ; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -674,27 +674,27 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP1]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP2]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP6]], ptr [[TMP2]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP4]] to i32 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 +; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], ptr [[TMP2]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.if2: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP3]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP5]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.continue3: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -718,7 +718,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ;