diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6541,15 +6541,14 @@ unsigned AS = getLoadStoreAddressSpace(I); Value *Ptr = getLoadStorePointerOperand(I); - Type *PtrTy = ToVectorTy(Ptr->getType(), VF); // Figure out whether the access is strided and get the stride value // if it's known in compile time const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop); // Get the cost of the scalar memory instruction and address computation. - unsigned Cost = - VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); + unsigned Cost = VF.getKnownMinValue() * + TTI.getAddressComputationCost(Ptr->getType(), SE, PtrSCEV); // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll @@ -1,21 +1,22 @@ ; RUN: opt -loop-vectorize -mtriple=arm64-apple-ios -S -mcpu=cyclone -enable-interleaved-mem-accesses=false < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" -@kernel = global [512 x float] zeroinitializer, align 16 -@kernel2 = global [512 x float] zeroinitializer, align 16 -@kernel3 = global [512 x float] zeroinitializer, align 16 -@kernel4 = global [512 x float] zeroinitializer, align 16 -@src_data = global [1536 x float] zeroinitializer, align 16 -@r_ = global i8 0, align 1 -@g_ = global i8 0, align 1 -@b_ = global i8 0, align 1 +@kernel = external global [512 x float], align 16 +@kernel2 = external global [512 x float], align 16 +@kernel3 = external global [512 x float], align 16 +@kernel4 = external global [512 x float], align 16 +@src_data = external global [1536 x float], align 16 +@r_ = external global i8, align 1 +@g_ = external global i8, align 1 +@b_ = external global i8, align 1 -; We don't want to vectorize most loops containing gathers because they are -; expensive. -; Make sure we don't vectorize it. -; CHECK-NOT: x float> +; For @test1, vectorizing with scalarizing the gathers is +; beneficial over all. + +; CHECK-LABEL: @test1 +; CHECK: <4 x float> -define void @_Z4testmm(i64 %size, i64 %offset) { +define void @test1(i64 %size, i64 %offset) { entry: %cmp53 = icmp eq i64 %size, 0 br i1 %cmp53, label %for.end, label %for.body.lr.ph @@ -83,3 +84,42 @@ store i8 %b.0.lcssa, i8* @b_, align 1 ret void } + +; Vectorizing @test2 is not beneficial, because of the scalarization +; overhead for the gathers. +define void @test2(i64 %size, i64 %offset) { +; CHECK-LABEL: define void @test2( +; CHECK-NOT: x float> +; CHECK: ret void +entry: + %cmp53 = icmp eq i64 %size, 0 + br i1 %cmp53, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.body: + %v.055 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul13, %for.body ] + %add = add i64 %v.055, %offset + %mul = mul i64 %add, 3 + %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul + %l.0 = load float, float* %arrayidx, align 4 + %arrayidx.sum = add i64 %mul, 1 + %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum + %l.1 = load float, float* %arrayidx11, align 4 + %mul13 = fmul fast float %l.0, %l.1 + %inc = add i64 %v.055, 1 + %exitcond = icmp ne i64 %inc, %size + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: + %add30.lcssa = phi float [ %mul13, %for.body ] + %phitmp61 = fptoui float %add30.lcssa to i8 + br label %for.end + +for.end: + %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ] + store i8 %b.0.lcssa, i8* @b_, align 1 + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll @@ -11,14 +11,13 @@ @g_ = global i8 0, align 4 @b_ = global i8 0, align 4 -; We don't want to vectorize most loops containing gathers because they are -; expensive. This function represents a point where vectorization starts to -; become beneficial. -; Make sure we are conservative and don't vectorize it. -; CHECK-NOT: <2 x float> -; CHECK-NOT: <4 x float> +; For @test1, vectorizing with scalarizing the gathers is +; beneficial over all. -define void @_Z4testmm(i32 %size, i32 %offset) { +; CHECK-LABEL: @test1 +; CHECK: <4 x float> + +define void @test1(i32 %size, i32 %offset) { entry: %cmp53 = icmp eq i32 %size, 0 br i1 %cmp53, label %for.end, label %for.body.lr.ph @@ -86,3 +85,42 @@ store i8 %b.0.lcssa, i8* @b_, align 4 ret void } + +; Vectorizing @test2 is not beneficial, because of the scalarization +; overhead for the gathers. +define void @test2(i64 %size, i64 %offset) { +; CHECK-LABEL: define void @test2( +; CHECK-NOT: x float> +; CHECK: ret void +entry: + %cmp53 = icmp eq i64 %size, 0 + br i1 %cmp53, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.body: + %v.055 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul13, %for.body ] + %add = add i64 %v.055, %offset + %mul = mul i64 %add, 3 + %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul + %l.0 = load float, float* %arrayidx, align 4 + %arrayidx.sum = add i64 %mul, 1 + %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum + %l.1 = load float, float* %arrayidx11, align 4 + %mul13 = fmul fast float %l.0, %l.1 + %inc = add i64 %v.055, 1 + %exitcond = icmp ne i64 %inc, %size + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: + %add30.lcssa = phi float [ %mul13, %for.body ] + %phitmp61 = fptoui float %add30.lcssa to i8 + br label %for.end + +for.end: + %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ] + store i8 %b.0.lcssa, i8* @b_, align 1 + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll @@ -10,13 +10,13 @@ @g_ = global i8 0, align 1 @b_ = global i8 0, align 1 -; We don't want to vectorize most loops containing gathers because they are -; expensive. This function represents a point where vectorization starts to -; become beneficial. -; Make sure we are conservative and don't vectorize it. -; CHECK-NOT: x float> +; For @test1, vectorizing with scalarizing the gathers is +; beneficial over all. + +; CHECK-LABEL: @test1 +; CHECK: <4 x float> -define void @_Z4testmm(i64 %size, i64 %offset) { +define void @test1(i64 %size, i64 %offset) { entry: %cmp53 = icmp eq i64 %size, 0 br i1 %cmp53, label %for.end, label %for.body.lr.ph @@ -84,3 +84,42 @@ store i8 %b.0.lcssa, i8* @b_, align 1 ret void } + +; Vectorizing this function is not beneficial, because of the scalarization +; overhead for the gathers. +define void @test2(i64 %size, i64 %offset) { +; CHECK-LABEL: define void @test2( +; CHECK-NOT: x float> +; CHECK: ret void +entry: + %cmp53 = icmp eq i64 %size, 0 + br i1 %cmp53, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.body: + %v.055 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul13, %for.body ] + %add = add i64 %v.055, %offset + %mul = mul i64 %add, 3 + %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul + %l.0 = load float, float* %arrayidx, align 4 + %arrayidx.sum = add i64 %mul, 1 + %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum + %l.1 = load float, float* %arrayidx11, align 4 + %mul13 = fmul fast float %l.0, %l.1 + %inc = add i64 %v.055, 1 + %exitcond = icmp ne i64 %inc, %size + br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: + %add30.lcssa = phi float [ %mul13, %for.body ] + %phitmp61 = fptoui float %add30.lcssa to i8 + br label %for.end + +for.end: + %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ] + store i8 %b.0.lcssa, i8* @b_, align 1 + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -108,7 +108,7 @@ ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* @@ -122,65 +122,497 @@ ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX]], 64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775744 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[K:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT8]], <16 x i32*> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <16 x i32> undef, i32 [[K]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT9]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <16 x i32> undef, i32 [[K]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT11]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <16 x i32> undef, i32 [[K]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT13]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT15]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT19]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT21]], <16 x i32> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE148:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !8, !noalias !11 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP5]], align 4, !alias.scope !8, !noalias !11 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT7]], <16 x i32*> [[BROADCAST_SPLAT9]], i32 4, <16 x i1> [[TMP4]]), !alias.scope !11 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP13:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 16 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 32 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 48 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>* +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, <16 x i32>* [[TMP9]], align 8, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD6]], [[BROADCAST_SPLAT10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD7]], [[BROADCAST_SPLAT12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD8]], [[BROADCAST_SPLAT14]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT16]], <16 x i32>* [[TMP14]], align 4, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT18]], <16 x i32>* [[TMP15]], align 4, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT20]], <16 x i32>* [[TMP16]], align 4, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>* +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT22]], <16 x i32>* [[TMP17]], align 4, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] +; CHECK: pred.store.if23: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] +; CHECK: pred.store.continue24: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP10]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] +; CHECK: pred.store.if25: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] +; CHECK: pred.store.continue26: +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP10]], i32 3 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] +; CHECK: pred.store.if27: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] +; CHECK: pred.store.continue28: +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP10]], i32 4 +; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] +; CHECK: pred.store.if29: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] +; CHECK: pred.store.continue30: +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP10]], i32 5 +; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] +; CHECK: pred.store.if31: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]] +; CHECK: pred.store.continue32: +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP10]], i32 6 +; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] +; CHECK: pred.store.if33: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE34]] +; CHECK: pred.store.continue34: +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP10]], i32 7 +; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] +; CHECK: pred.store.if35: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE36]] +; CHECK: pred.store.continue36: +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP10]], i32 8 +; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] +; CHECK: pred.store.if37: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE38]] +; CHECK: pred.store.continue38: +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP10]], i32 9 +; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] +; CHECK: pred.store.if39: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE40]] +; CHECK: pred.store.continue40: +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP10]], i32 10 +; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] +; CHECK: pred.store.if41: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE42]] +; CHECK: pred.store.continue42: +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP10]], i32 11 +; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] +; CHECK: pred.store.if43: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE44]] +; CHECK: pred.store.continue44: +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP10]], i32 12 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; CHECK: pred.store.if45: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE46]] +; CHECK: pred.store.continue46: +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP10]], i32 13 +; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; CHECK: pred.store.if47: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE48]] +; CHECK: pred.store.continue48: +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP10]], i32 14 +; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; CHECK: pred.store.if49: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE50]] +; CHECK: pred.store.continue50: +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP10]], i32 15 +; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; CHECK: pred.store.if51: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE52]] +; CHECK: pred.store.continue52: +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP11]], i32 0 +; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; CHECK: pred.store.if53: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE54]] +; CHECK: pred.store.continue54: +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP11]], i32 1 +; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; CHECK: pred.store.if55: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE56]] +; CHECK: pred.store.continue56: +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP11]], i32 2 +; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; CHECK: pred.store.if57: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE58]] +; CHECK: pred.store.continue58: +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP11]], i32 3 +; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]] +; CHECK: pred.store.if59: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE60]] +; CHECK: pred.store.continue60: +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP11]], i32 4 +; CHECK-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62:%.*]] +; CHECK: pred.store.if61: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE62]] +; CHECK: pred.store.continue62: +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP11]], i32 5 +; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF63:%.*]], label [[PRED_STORE_CONTINUE64:%.*]] +; CHECK: pred.store.if63: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE64]] +; CHECK: pred.store.continue64: +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP11]], i32 6 +; CHECK-NEXT: br i1 [[TMP40]], label [[PRED_STORE_IF65:%.*]], label [[PRED_STORE_CONTINUE66:%.*]] +; CHECK: pred.store.if65: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE66]] +; CHECK: pred.store.continue66: +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i1> [[TMP11]], i32 7 +; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF67:%.*]], label [[PRED_STORE_CONTINUE68:%.*]] +; CHECK: pred.store.if67: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE68]] +; CHECK: pred.store.continue68: +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i1> [[TMP11]], i32 8 +; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF69:%.*]], label [[PRED_STORE_CONTINUE70:%.*]] +; CHECK: pred.store.if69: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE70]] +; CHECK: pred.store.continue70: +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[TMP11]], i32 9 +; CHECK-NEXT: br i1 [[TMP43]], label [[PRED_STORE_IF71:%.*]], label [[PRED_STORE_CONTINUE72:%.*]] +; CHECK: pred.store.if71: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE72]] +; CHECK: pred.store.continue72: +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP11]], i32 10 +; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF73:%.*]], label [[PRED_STORE_CONTINUE74:%.*]] +; CHECK: pred.store.if73: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE74]] +; CHECK: pred.store.continue74: +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i1> [[TMP11]], i32 11 +; CHECK-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF75:%.*]], label [[PRED_STORE_CONTINUE76:%.*]] +; CHECK: pred.store.if75: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE76]] +; CHECK: pred.store.continue76: +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i1> [[TMP11]], i32 12 +; CHECK-NEXT: br i1 [[TMP46]], label [[PRED_STORE_IF77:%.*]], label [[PRED_STORE_CONTINUE78:%.*]] +; CHECK: pred.store.if77: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE78]] +; CHECK: pred.store.continue78: +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP11]], i32 13 +; CHECK-NEXT: br i1 [[TMP47]], label [[PRED_STORE_IF79:%.*]], label [[PRED_STORE_CONTINUE80:%.*]] +; CHECK: pred.store.if79: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE80]] +; CHECK: pred.store.continue80: +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i1> [[TMP11]], i32 14 +; CHECK-NEXT: br i1 [[TMP48]], label [[PRED_STORE_IF81:%.*]], label [[PRED_STORE_CONTINUE82:%.*]] +; CHECK: pred.store.if81: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE82]] +; CHECK: pred.store.continue82: +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[TMP11]], i32 15 +; CHECK-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF83:%.*]], label [[PRED_STORE_CONTINUE84:%.*]] +; CHECK: pred.store.if83: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE84]] +; CHECK: pred.store.continue84: +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: br i1 [[TMP50]], label [[PRED_STORE_IF85:%.*]], label [[PRED_STORE_CONTINUE86:%.*]] +; CHECK: pred.store.if85: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE86]] +; CHECK: pred.store.continue86: +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: br i1 [[TMP51]], label [[PRED_STORE_IF87:%.*]], label [[PRED_STORE_CONTINUE88:%.*]] +; CHECK: pred.store.if87: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE88]] +; CHECK: pred.store.continue88: +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP12]], i32 2 +; CHECK-NEXT: br i1 [[TMP52]], label [[PRED_STORE_IF89:%.*]], label [[PRED_STORE_CONTINUE90:%.*]] +; CHECK: pred.store.if89: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE90]] +; CHECK: pred.store.continue90: +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <16 x i1> [[TMP12]], i32 3 +; CHECK-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF91:%.*]], label [[PRED_STORE_CONTINUE92:%.*]] +; CHECK: pred.store.if91: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE92]] +; CHECK: pred.store.continue92: +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i1> [[TMP12]], i32 4 +; CHECK-NEXT: br i1 [[TMP54]], label [[PRED_STORE_IF93:%.*]], label [[PRED_STORE_CONTINUE94:%.*]] +; CHECK: pred.store.if93: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE94]] +; CHECK: pred.store.continue94: +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[TMP12]], i32 5 +; CHECK-NEXT: br i1 [[TMP55]], label [[PRED_STORE_IF95:%.*]], label [[PRED_STORE_CONTINUE96:%.*]] +; CHECK: pred.store.if95: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE96]] +; CHECK: pred.store.continue96: +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP12]], i32 6 +; CHECK-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF97:%.*]], label [[PRED_STORE_CONTINUE98:%.*]] +; CHECK: pred.store.if97: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE98]] +; CHECK: pred.store.continue98: +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <16 x i1> [[TMP12]], i32 7 +; CHECK-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF99:%.*]], label [[PRED_STORE_CONTINUE100:%.*]] +; CHECK: pred.store.if99: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE100]] +; CHECK: pred.store.continue100: +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i1> [[TMP12]], i32 8 +; CHECK-NEXT: br i1 [[TMP58]], label [[PRED_STORE_IF101:%.*]], label [[PRED_STORE_CONTINUE102:%.*]] +; CHECK: pred.store.if101: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE102]] +; CHECK: pred.store.continue102: +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP12]], i32 9 +; CHECK-NEXT: br i1 [[TMP59]], label [[PRED_STORE_IF103:%.*]], label [[PRED_STORE_CONTINUE104:%.*]] +; CHECK: pred.store.if103: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE104]] +; CHECK: pred.store.continue104: +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <16 x i1> [[TMP12]], i32 10 +; CHECK-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF105:%.*]], label [[PRED_STORE_CONTINUE106:%.*]] +; CHECK: pred.store.if105: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE106]] +; CHECK: pred.store.continue106: +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i1> [[TMP12]], i32 11 +; CHECK-NEXT: br i1 [[TMP61]], label [[PRED_STORE_IF107:%.*]], label [[PRED_STORE_CONTINUE108:%.*]] +; CHECK: pred.store.if107: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE108]] +; CHECK: pred.store.continue108: +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i1> [[TMP12]], i32 12 +; CHECK-NEXT: br i1 [[TMP62]], label [[PRED_STORE_IF109:%.*]], label [[PRED_STORE_CONTINUE110:%.*]] +; CHECK: pred.store.if109: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE110]] +; CHECK: pred.store.continue110: +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP12]], i32 13 +; CHECK-NEXT: br i1 [[TMP63]], label [[PRED_STORE_IF111:%.*]], label [[PRED_STORE_CONTINUE112:%.*]] +; CHECK: pred.store.if111: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE112]] +; CHECK: pred.store.continue112: +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP12]], i32 14 +; CHECK-NEXT: br i1 [[TMP64]], label [[PRED_STORE_IF113:%.*]], label [[PRED_STORE_CONTINUE114:%.*]] +; CHECK: pred.store.if113: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE114]] +; CHECK: pred.store.continue114: +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i1> [[TMP12]], i32 15 +; CHECK-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF115:%.*]], label [[PRED_STORE_CONTINUE116:%.*]] +; CHECK: pred.store.if115: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE116]] +; CHECK: pred.store.continue116: +; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: br i1 [[TMP66]], label [[PRED_STORE_IF117:%.*]], label [[PRED_STORE_CONTINUE118:%.*]] +; CHECK: pred.store.if117: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE118]] +; CHECK: pred.store.continue118: +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: br i1 [[TMP67]], label [[PRED_STORE_IF119:%.*]], label [[PRED_STORE_CONTINUE120:%.*]] +; CHECK: pred.store.if119: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE120]] +; CHECK: pred.store.continue120: +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <16 x i1> [[TMP13]], i32 2 +; CHECK-NEXT: br i1 [[TMP68]], label [[PRED_STORE_IF121:%.*]], label [[PRED_STORE_CONTINUE122:%.*]] +; CHECK: pred.store.if121: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE122]] +; CHECK: pred.store.continue122: +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <16 x i1> [[TMP13]], i32 3 +; CHECK-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF123:%.*]], label [[PRED_STORE_CONTINUE124:%.*]] +; CHECK: pred.store.if123: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE124]] +; CHECK: pred.store.continue124: +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i1> [[TMP13]], i32 4 +; CHECK-NEXT: br i1 [[TMP70]], label [[PRED_STORE_IF125:%.*]], label [[PRED_STORE_CONTINUE126:%.*]] +; CHECK: pred.store.if125: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE126]] +; CHECK: pred.store.continue126: +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i1> [[TMP13]], i32 5 +; CHECK-NEXT: br i1 [[TMP71]], label [[PRED_STORE_IF127:%.*]], label [[PRED_STORE_CONTINUE128:%.*]] +; CHECK: pred.store.if127: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE128]] +; CHECK: pred.store.continue128: +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <16 x i1> [[TMP13]], i32 6 +; CHECK-NEXT: br i1 [[TMP72]], label [[PRED_STORE_IF129:%.*]], label [[PRED_STORE_CONTINUE130:%.*]] +; CHECK: pred.store.if129: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE130]] +; CHECK: pred.store.continue130: +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i1> [[TMP13]], i32 7 +; CHECK-NEXT: br i1 [[TMP73]], label [[PRED_STORE_IF131:%.*]], label [[PRED_STORE_CONTINUE132:%.*]] +; CHECK: pred.store.if131: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE132]] +; CHECK: pred.store.continue132: +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i1> [[TMP13]], i32 8 +; CHECK-NEXT: br i1 [[TMP74]], label [[PRED_STORE_IF133:%.*]], label [[PRED_STORE_CONTINUE134:%.*]] +; CHECK: pred.store.if133: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE134]] +; CHECK: pred.store.continue134: +; CHECK-NEXT: [[TMP75:%.*]] = extractelement <16 x i1> [[TMP13]], i32 9 +; CHECK-NEXT: br i1 [[TMP75]], label [[PRED_STORE_IF135:%.*]], label [[PRED_STORE_CONTINUE136:%.*]] +; CHECK: pred.store.if135: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE136]] +; CHECK: pred.store.continue136: +; CHECK-NEXT: [[TMP76:%.*]] = extractelement <16 x i1> [[TMP13]], i32 10 +; CHECK-NEXT: br i1 [[TMP76]], label [[PRED_STORE_IF137:%.*]], label [[PRED_STORE_CONTINUE138:%.*]] +; CHECK: pred.store.if137: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE138]] +; CHECK: pred.store.continue138: +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i1> [[TMP13]], i32 11 +; CHECK-NEXT: br i1 [[TMP77]], label [[PRED_STORE_IF139:%.*]], label [[PRED_STORE_CONTINUE140:%.*]] +; CHECK: pred.store.if139: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE140]] +; CHECK: pred.store.continue140: +; CHECK-NEXT: [[TMP78:%.*]] = extractelement <16 x i1> [[TMP13]], i32 12 +; CHECK-NEXT: br i1 [[TMP78]], label [[PRED_STORE_IF141:%.*]], label [[PRED_STORE_CONTINUE142:%.*]] +; CHECK: pred.store.if141: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE142]] +; CHECK: pred.store.continue142: +; CHECK-NEXT: [[TMP79:%.*]] = extractelement <16 x i1> [[TMP13]], i32 13 +; CHECK-NEXT: br i1 [[TMP79]], label [[PRED_STORE_IF143:%.*]], label [[PRED_STORE_CONTINUE144:%.*]] +; CHECK: pred.store.if143: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE144]] +; CHECK: pred.store.continue144: +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <16 x i1> [[TMP13]], i32 14 +; CHECK-NEXT: br i1 [[TMP80]], label [[PRED_STORE_IF145:%.*]], label [[PRED_STORE_CONTINUE146:%.*]] +; CHECK: pred.store.if145: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE146]] +; CHECK: pred.store.continue146: +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i1> [[TMP13]], i32 15 +; CHECK-NEXT: br i1 [[TMP81]], label [[PRED_STORE_IF147:%.*]], label [[PRED_STORE_CONTINUE148]] +; CHECK: pred.store.if147: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !11 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE148]] +; CHECK: pred.store.continue148: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 64 +; CHECK-NEXT: [[TMP82:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP82]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP13:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX]], 8 -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX]], 60 +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX10:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1 -; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[SMAX10]], 9223372036854775800 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> undef, i32 [[K]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT19]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT21]], <8 x i32*> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP83:%.*]] = icmp sgt i64 [[N]], 1 +; CHECK-NEXT: [[SMAX149:%.*]] = select i1 [[TMP83]], i64 [[N]], i64 1 +; CHECK-NEXT: [[N_VEC151:%.*]] = and i64 [[SMAX149]], 9223372036854775804 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT156:%.*]] = insertelement <4 x i32> undef, i32 [[K]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT157:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT156]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT158:%.*]] = insertelement <4 x i32> undef, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT159:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT158]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX13:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX13]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD16]], [[BROADCAST_SPLAT18]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT20]], <8 x i32>* [[TMP11]], align 4 -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[BROADCAST_SPLAT20]], <8 x i32*> [[BROADCAST_SPLAT22]], i32 4, <8 x i1> [[TMP10]]) -; CHECK-NEXT: [[INDEX_NEXT14]] = add i64 [[INDEX13]], 8 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC12]] -; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] +; CHECK-NEXT: [[INDEX152:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT153:%.*]], [[PRED_STORE_CONTINUE167:%.*]] ] +; CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX152]] +; CHECK-NEXT: [[TMP85:%.*]] = bitcast i32* [[TMP84]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD155:%.*]] = load <4 x i32>, <4 x i32>* [[TMP85]], align 8 +; CHECK-NEXT: [[TMP86:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD155]], [[BROADCAST_SPLAT157]] +; CHECK-NEXT: [[TMP87:%.*]] = bitcast i32* [[TMP84]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT159]], <4 x i32>* [[TMP87]], align 4 +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP86]], i32 0 +; CHECK-NEXT: br i1 [[TMP88]], label [[PRED_STORE_IF160:%.*]], label [[PRED_STORE_CONTINUE161:%.*]] +; CHECK: pred.store.if160: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE161]] +; CHECK: pred.store.continue161: +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <4 x i1> [[TMP86]], i32 1 +; CHECK-NEXT: br i1 [[TMP89]], label [[PRED_STORE_IF162:%.*]], label [[PRED_STORE_CONTINUE163:%.*]] +; CHECK: pred.store.if162: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE163]] +; CHECK: pred.store.continue163: +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <4 x i1> [[TMP86]], i32 2 +; CHECK-NEXT: br i1 [[TMP90]], label [[PRED_STORE_IF164:%.*]], label [[PRED_STORE_CONTINUE165:%.*]] +; CHECK: pred.store.if164: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE165]] +; CHECK: pred.store.continue165: +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <4 x i1> [[TMP86]], i32 3 +; CHECK-NEXT: br i1 [[TMP91]], label [[PRED_STORE_IF166:%.*]], label [[PRED_STORE_CONTINUE167]] +; CHECK: pred.store.if166: +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE167]] +; CHECK: pred.store.continue167: +; CHECK-NEXT: [[INDEX_NEXT153]] = add i64 [[INDEX152]], 4 +; CHECK-NEXT: [[TMP92:%.*]] = icmp eq i64 [[INDEX_NEXT153]], [[N_VEC151]] +; CHECK-NEXT: br i1 [[TMP92]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC12]] -; CHECK-NEXT: br i1 [[CMP_N15]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N154:%.*]] = icmp eq i64 [[SMAX149]], [[N_VEC151]] +; CHECK-NEXT: br i1 [[CMP_N154]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC151]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -228,12 +660,12 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, i32* %c, i32 %k) { ; CHECK-LABEL: @variant_val_store_to_inv_address_conditional( -; CHECK-NEXT: iter.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 ; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[C5:%.*]] = bitcast i32* [[C:%.*]] to i8* ; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* @@ -254,18 +686,15 @@ ; CHECK-NEXT: [[BOUND113:%.*]] = icmp ugt i8* [[UGLYGEP]], [[C5]] ; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] ; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK16:%.*]] = icmp ult i64 [[SMAX]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK16]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[K:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT19]], <16 x i32*> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <16 x i32> undef, i32 [[NTRUNC]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT16]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <16 x i32*> undef, i32* [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT18]], <16 x i32*> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -274,56 +703,22 @@ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !17, !noalias !20 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT18]], <16 x i32>* [[TMP5]], align 4, !alias.scope !17, !noalias !20 +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT17]], <16 x i32>* [[TMP5]], align 4, !alias.scope !17, !noalias !20 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>* ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP7]], i32 8, <16 x i1> [[TMP4]], <16 x i32> undef), !alias.scope !23 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT20]], i32 4, <16 x i1> [[TMP4]]), !alias.scope !24, !noalias !23 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT19]], i32 4, <16 x i1> [[TMP4]]), !alias.scope !24, !noalias !23 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP25:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX]], 8 -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i64 [[N]], 1 -; CHECK-NEXT: [[SMAX21:%.*]] = select i1 [[TMP9]], i64 [[N]], i64 1 -; CHECK-NEXT: [[N_VEC23:%.*]] = and i64 [[SMAX21]], 9223372036854775800 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> undef, i32 [[K]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <8 x i32> undef, i32 [[NTRUNC]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT30]], <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <8 x i32*> undef, i32* [[A]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT34:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT33]], <8 x i32*> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX24]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD27]], [[BROADCAST_SPLAT29]] -; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT31]], <8 x i32>* [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX24]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP15]], i32 8, <8 x i1> [[TMP12]], <8 x i32> undef) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP12]]) -; CHECK-NEXT: [[INDEX_NEXT25]] = add i64 [[INDEX24]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]] -; CHECK-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N26:%.*]] = icmp eq i64 [[SMAX21]], [[N_VEC23]] -; CHECK-NEXT: br i1 [[CMP_N26]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] ; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[T1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T2]], [[K]] @@ -337,9 +732,7 @@ ; CHECK: latch: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], [[LOOP27:!llvm.loop !.*]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP26:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -127,7 +127,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -153,7 +153,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.body: ; ENABLED_MASKED_STRIDED-NEXT: [[IX_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1016, [[VECTOR_BODY]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_09]], [[CONV]] @@ -168,7 +168,7 @@ ; ENABLED_MASKED_STRIDED: for.inc: ; ENABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_09]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -303,7 +303,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !2 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -330,7 +330,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -495,7 +495,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !3 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -532,7 +532,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -702,7 +702,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !4 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -739,7 +739,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -843,7 +843,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -862,7 +862,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP4]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1008,7 +1008,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1041,7 +1041,7 @@ ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]]) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1418,7 +1418,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1429,29 +1429,169 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE31:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE31]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) +; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = or <8 x i32> [[TMP1]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP9]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i8> [[TMP7]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] +; ENABLED_MASKED_STRIDED: pred.store.continue: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if2: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP13]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i8> [[TMP7]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE3]] +; ENABLED_MASKED_STRIDED: pred.store.continue3: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if4: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP17]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP7]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP19]], i8* [[TMP18]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE5]] +; ENABLED_MASKED_STRIDED: pred.store.continue5: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if6: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP21]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i8> [[TMP7]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP23]], i8* [[TMP22]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE7]] +; ENABLED_MASKED_STRIDED: pred.store.continue7: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if8: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP25]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i8> [[TMP7]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP27]], i8* [[TMP26]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] +; ENABLED_MASKED_STRIDED: pred.store.continue9: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if10: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP29]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = extractelement <8 x i8> [[TMP7]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP31]], i8* [[TMP30]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] +; ENABLED_MASKED_STRIDED: pred.store.continue11: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if12: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP33]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP7]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP35]], i8* [[TMP34]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] +; ENABLED_MASKED_STRIDED: pred.store.continue13: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if14: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP37]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i8> [[TMP7]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] +; ENABLED_MASKED_STRIDED: pred.store.continue15: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = sub <8 x i8> zeroinitializer, [[TMP7]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if16: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = extractelement <8 x i32> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP42]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i8> [[TMP40]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP44]], i8* [[TMP43]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE17]] +; ENABLED_MASKED_STRIDED: pred.store.continue17: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if18: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i32> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP46]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = extractelement <8 x i8> [[TMP40]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP48]], i8* [[TMP47]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE19]] +; ENABLED_MASKED_STRIDED: pred.store.continue19: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if20: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = extractelement <8 x i32> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP50]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i8> [[TMP40]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP52]], i8* [[TMP51]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE21]] +; ENABLED_MASKED_STRIDED: pred.store.continue21: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if22: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i32> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP54]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = extractelement <8 x i8> [[TMP40]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP56]], i8* [[TMP55]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE23]] +; ENABLED_MASKED_STRIDED: pred.store.continue23: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if24: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP58]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i8> [[TMP40]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP60]], i8* [[TMP59]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE25]] +; ENABLED_MASKED_STRIDED: pred.store.continue25: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP61]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if26: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP62]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i8> [[TMP40]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP64]], i8* [[TMP63]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE27]] +; ENABLED_MASKED_STRIDED: pred.store.continue27: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if28: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i32> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP66]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = extractelement <8 x i8> [[TMP40]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP68]], i8* [[TMP67]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE29]] +; ENABLED_MASKED_STRIDED: pred.store.continue29: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.if30: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP70]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i8> [[TMP40]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP72]], i8* [[TMP71]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.continue31: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !9 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP73]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1847,7 +1987,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1865,31 +2005,171 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE33:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE33]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> undef, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP6]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = sub <8 x i8> zeroinitializer, [[TMP8]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i32 [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> [[TMP9]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP12]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = or <8 x i32> [[TMP2]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP11]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP9]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP13]], i8* [[TMP12]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] +; ENABLED_MASKED_STRIDED: pred.store.continue: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if4: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP15]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP9]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP17]], i8* [[TMP16]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE5]] +; ENABLED_MASKED_STRIDED: pred.store.continue5: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if6: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP19]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i8> [[TMP9]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP21]], i8* [[TMP20]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE7]] +; ENABLED_MASKED_STRIDED: pred.store.continue7: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if8: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP23]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <8 x i8> [[TMP9]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP25]], i8* [[TMP24]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] +; ENABLED_MASKED_STRIDED: pred.store.continue9: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if10: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP27]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i8> [[TMP9]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP29]], i8* [[TMP28]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] +; ENABLED_MASKED_STRIDED: pred.store.continue11: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if12: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP31]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i8> [[TMP9]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP33]], i8* [[TMP32]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] +; ENABLED_MASKED_STRIDED: pred.store.continue13: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if14: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP35]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = extractelement <8 x i8> [[TMP9]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP37]], i8* [[TMP36]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] +; ENABLED_MASKED_STRIDED: pred.store.continue15: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if16: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP39]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i8> [[TMP9]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP41]], i8* [[TMP40]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE17]] +; ENABLED_MASKED_STRIDED: pred.store.continue17: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = sub <8 x i8> zeroinitializer, [[TMP9]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP43]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if18: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i32> [[TMP7]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP44]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i8> [[TMP42]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP46]], i8* [[TMP45]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE19]] +; ENABLED_MASKED_STRIDED: pred.store.continue19: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = extractelement <8 x i1> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP47]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if20: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = extractelement <8 x i32> [[TMP7]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP48]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = extractelement <8 x i8> [[TMP42]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP50]], i8* [[TMP49]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE21]] +; ENABLED_MASKED_STRIDED: pred.store.continue21: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if22: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP7]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP52]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i8> [[TMP42]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP54]], i8* [[TMP53]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE23]] +; ENABLED_MASKED_STRIDED: pred.store.continue23: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = extractelement <8 x i1> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP55]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if24: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = extractelement <8 x i32> [[TMP7]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP56]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i8> [[TMP42]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP58]], i8* [[TMP57]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE25]] +; ENABLED_MASKED_STRIDED: pred.store.continue25: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP59]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if26: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i32> [[TMP7]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP60]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = extractelement <8 x i8> [[TMP42]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP62]], i8* [[TMP61]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE27]] +; ENABLED_MASKED_STRIDED: pred.store.continue27: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP63]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if28: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i32> [[TMP7]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP64]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i8> [[TMP42]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP66]], i8* [[TMP65]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE29]] +; ENABLED_MASKED_STRIDED: pred.store.continue29: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = extractelement <8 x i1> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP67]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if30: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = extractelement <8 x i32> [[TMP7]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP68]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i8> [[TMP42]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP70]], i8* [[TMP69]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.continue31: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = extractelement <8 x i1> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP71]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33]] +; ENABLED_MASKED_STRIDED: pred.store.if32: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i32> [[TMP7]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP72]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = extractelement <8 x i8> [[TMP42]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP74]], i8* [[TMP73]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE33]] +; ENABLED_MASKED_STRIDED: pred.store.continue33: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !10 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP75]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2287,7 +2567,7 @@ ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], ; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !9 +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; DISABLED_MASKED_STRIDED: for.end: ; DISABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2303,30 +2583,169 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE31:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE31]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) +; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = or <8 x i32> [[TMP1]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP9]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i8> [[TMP7]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] +; ENABLED_MASKED_STRIDED: pred.store.continue: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if2: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP13]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i8> [[TMP7]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE3]] +; ENABLED_MASKED_STRIDED: pred.store.continue3: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if4: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP17]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP7]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP19]], i8* [[TMP18]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE5]] +; ENABLED_MASKED_STRIDED: pred.store.continue5: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if6: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP21]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i8> [[TMP7]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP23]], i8* [[TMP22]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE7]] +; ENABLED_MASKED_STRIDED: pred.store.continue7: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if8: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP25]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i8> [[TMP7]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP27]], i8* [[TMP26]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] +; ENABLED_MASKED_STRIDED: pred.store.continue9: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if10: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP29]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = extractelement <8 x i8> [[TMP7]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP31]], i8* [[TMP30]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] +; ENABLED_MASKED_STRIDED: pred.store.continue11: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if12: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP33]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP7]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP35]], i8* [[TMP34]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] +; ENABLED_MASKED_STRIDED: pred.store.continue13: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if14: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP37]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i8> [[TMP7]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] +; ENABLED_MASKED_STRIDED: pred.store.continue15: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = sub <8 x i8> zeroinitializer, [[TMP7]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if16: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = extractelement <8 x i32> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP42]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i8> [[TMP40]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP44]], i8* [[TMP43]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE17]] +; ENABLED_MASKED_STRIDED: pred.store.continue17: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if18: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i32> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP46]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = extractelement <8 x i8> [[TMP40]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP48]], i8* [[TMP47]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE19]] +; ENABLED_MASKED_STRIDED: pred.store.continue19: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if20: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = extractelement <8 x i32> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP50]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i8> [[TMP40]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP52]], i8* [[TMP51]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE21]] +; ENABLED_MASKED_STRIDED: pred.store.continue21: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if22: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i32> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP54]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = extractelement <8 x i8> [[TMP40]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP56]], i8* [[TMP55]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE23]] +; ENABLED_MASKED_STRIDED: pred.store.continue23: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if24: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP58]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i8> [[TMP40]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP60]], i8* [[TMP59]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE25]] +; ENABLED_MASKED_STRIDED: pred.store.continue25: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP61]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if26: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP62]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i8> [[TMP40]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP64]], i8* [[TMP63]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE27]] +; ENABLED_MASKED_STRIDED: pred.store.continue27: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if28: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i32> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP66]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = extractelement <8 x i8> [[TMP40]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP68]], i8* [[TMP67]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE29]] +; ENABLED_MASKED_STRIDED: pred.store.continue29: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.if30: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP70]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i8> [[TMP40]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP72]], i8* [[TMP71]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.continue31: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !11 +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP73]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP11:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ;