diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6652,23 +6652,36 @@ PredicatedScalarEvolution &PSE, const Loop *TheLoop) { - auto *Gep = dyn_cast(Ptr); - if (!Gep) - return nullptr; - // We are looking for a gep with all loop invariant indices except for one // which should be an induction variable. auto SE = PSE.getSE(); - unsigned NumOperands = Gep->getNumOperands(); - for (unsigned i = 1; i < NumOperands; ++i) { - Value *Opd = Gep->getOperand(i); - if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) && - !Legal->isInductionVariable(Opd)) + const auto *PtrSCEV = SE->getSCEV(Ptr); + if (isa(PtrSCEV)) { + if (!SE->isLoopInvariant(PtrSCEV, TheLoop)) return nullptr; + + } else if (isa(PtrSCEV)) { + const auto *GepNary = cast(PtrSCEV); + for (unsigned op = 0; op < GepNary->getNumOperands(); op++) { + if (!SE->isLoopInvariant(GepNary->getOperand(op), TheLoop)) + return nullptr; + } + } else { + auto *Gep = dyn_cast(Ptr); + if (!Gep) + return nullptr; + + unsigned NumOperands = Gep->getNumOperands(); + for (unsigned i = 1; i < NumOperands; ++i) { + Value *Opd = Gep->getOperand(i); + if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) && + !Legal->isInductionVariable(Opd)) + return nullptr; + } } // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV. - return PSE.getSCEV(Ptr); + return PtrSCEV; } static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { @@ -7090,6 +7103,11 @@ InstructionCost ScalarizationCost = getMemInstScalarizationCost(&I, VF) * NumAccesses; + LLVM_DEBUG(dbgs() << "Instruction" << I << "\n"); + LLVM_DEBUG(dbgs() << "Interleaving cost : " << InterleaveCost << "\n"); + LLVM_DEBUG(dbgs() << "GatherScatter cost: " << GatherScatterCost << "\n"); + LLVM_DEBUG(dbgs() << "Scalaization cost: " << ScalarizationCost << "\n"); + // Choose better solution for the current VF, // write down this decision and use it during vectorization. InstructionCost Cost; @@ -7098,12 +7116,15 @@ InterleaveCost < ScalarizationCost) { Decision = CM_Interleave; Cost = InterleaveCost; + LLVM_DEBUG(dbgs() << "Widening decision: CM_Interleave\n"); } else if (GatherScatterCost < ScalarizationCost) { Decision = CM_GatherScatter; Cost = GatherScatterCost; + LLVM_DEBUG(dbgs() << "Widening decision: CM_GatherScatter\n"); } else { Decision = CM_Scalarize; Cost = ScalarizationCost; + LLVM_DEBUG(dbgs() << "Widening decision: CM_Scalarize\n"); } // If the instructions belongs to an interleave group, the whole group // receives the same decision. The whole group receives the cost, but diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll @@ -13,7 +13,7 @@ ; We don't want to vectorize most loops containing gathers because they are ; expensive. ; Make sure we don't vectorize it. -; CHECK-NOT: x float> +; CHECK: x float> define void @_Z4testmm(i64 %size, i64 %offset) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll @@ -15,8 +15,7 @@ ; expensive. This function represents a point where vectorization starts to ; become beneficial. ; Make sure we are conservative and don't vectorize it. -; CHECK-NOT: <2 x float> -; CHECK-NOT: <4 x float> +; CHECK: <4 x float> define void @_Z4testmm(i32 %size, i32 %offset) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll @@ -10,11 +10,7 @@ @g_ = global i8 0, align 1 @b_ = global i8 0, align 1 -; We don't want to vectorize most loops containing gathers because they are -; expensive. This function represents a point where vectorization starts to -; become beneficial. -; Make sure we are conservative and don't vectorize it. -; CHECK-NOT: x float> +; CHECK: x float> define void @_Z4testmm(i64 %size, i64 %offset) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/scalarization-fix.ll b/llvm/test/Transforms/LoopVectorize/X86/scalarization-fix.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/scalarization-fix.ll @@ -0,0 +1,71 @@ +; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s +; +; void fun(int* restrict a, int *b, int N) { +; for (int i = 1; i < N; i += 2) +; a[i] = a[i - 1] + b[i]; +; } +; + +;CHECK: Instruction %2 = load i32, i32* %arrayidx, align 4, !tbaa !2 +;CHECK-NEXT: Interleaving cost : 10 +;CHECK-NEXT: GatherScatter cost: 2147483647 +;CHECK-NEXT: Scalaization cost: 9 +;CHECK-NEXT: Widening decision: CM_Scalarize + +;CHECK: Instruction %2 = load i32, i32* %arrayidx, align 4, !tbaa !2 +;CHECK-NEXT: Interleaving cost : 20 +;CHECK-NEXT: GatherScatter cost: 2147483647 +;CHECK-NEXT: Scalaization cost: 19 +;CHECK-NEXT: Widening decision: CM_Scalarize + +;CHECK: LV: Vector loop of width 2 costs: 15. +;CHECK: LV: Vector loop of width 4 costs: 15. + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree noinline norecurse nounwind uwtable +define dso_local void @fun(i32* noalias nocapture %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { +entry: + %cmp12 = icmp sgt i32 %N, 1 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %0 = zext i32 %N to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %1 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %1 + %2 = load i32, i32* %arrayidx, align 4, !tbaa !2 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %3 = load i32, i32* %arrayidx2, align 4, !tbaa !2 + %add = add nsw i32 %3, %2 + %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %add, i32* %arrayidx4, align 4, !tbaa !2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 + %cmp = icmp ult i64 %indvars.iv.next, %0 + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !6 +} + +attributes #0 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 12.0.0 (CLANG: Jenkins CPUPC_Mirror_To_Staging_Merge-Build#892)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = distinct !{!6, !7} +!7 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -1429,29 +1429,169 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE31:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE31]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) +; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = or <8 x i32> [[TMP1]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP9]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i8> [[TMP7]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] +; ENABLED_MASKED_STRIDED: pred.store.continue: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if2: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP13]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i8> [[TMP7]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE3]] +; ENABLED_MASKED_STRIDED: pred.store.continue3: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if4: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP17]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP7]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP19]], i8* [[TMP18]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE5]] +; ENABLED_MASKED_STRIDED: pred.store.continue5: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if6: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP21]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i8> [[TMP7]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP23]], i8* [[TMP22]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE7]] +; ENABLED_MASKED_STRIDED: pred.store.continue7: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if8: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP25]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i8> [[TMP7]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP27]], i8* [[TMP26]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] +; ENABLED_MASKED_STRIDED: pred.store.continue9: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if10: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP29]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = extractelement <8 x i8> [[TMP7]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP31]], i8* [[TMP30]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] +; ENABLED_MASKED_STRIDED: pred.store.continue11: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if12: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP33]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP7]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP35]], i8* [[TMP34]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] +; ENABLED_MASKED_STRIDED: pred.store.continue13: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if14: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP37]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i8> [[TMP7]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] +; ENABLED_MASKED_STRIDED: pred.store.continue15: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = sub <8 x i8> zeroinitializer, [[TMP7]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if16: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = extractelement <8 x i32> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP42]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i8> [[TMP40]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP44]], i8* [[TMP43]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE17]] +; ENABLED_MASKED_STRIDED: pred.store.continue17: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if18: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i32> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP46]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = extractelement <8 x i8> [[TMP40]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP48]], i8* [[TMP47]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE19]] +; ENABLED_MASKED_STRIDED: pred.store.continue19: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if20: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = extractelement <8 x i32> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP50]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i8> [[TMP40]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP52]], i8* [[TMP51]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE21]] +; ENABLED_MASKED_STRIDED: pred.store.continue21: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if22: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i32> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP54]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = extractelement <8 x i8> [[TMP40]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP56]], i8* [[TMP55]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE23]] +; ENABLED_MASKED_STRIDED: pred.store.continue23: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if24: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP58]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i8> [[TMP40]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP60]], i8* [[TMP59]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE25]] +; ENABLED_MASKED_STRIDED: pred.store.continue25: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP61]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if26: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP62]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i8> [[TMP40]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP64]], i8* [[TMP63]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE27]] +; ENABLED_MASKED_STRIDED: pred.store.continue27: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if28: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i32> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP66]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = extractelement <8 x i8> [[TMP40]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP68]], i8* [[TMP67]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE29]] +; ENABLED_MASKED_STRIDED: pred.store.continue29: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.if30: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP70]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i8> [[TMP40]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP72]], i8* [[TMP71]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.continue31: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP73]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], [[LOOP9:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -1865,31 +2005,171 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE33:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE33]] ] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> poison, <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP6]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = sub <8 x i8> zeroinitializer, [[TMP8]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i32 [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> [[TMP9]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP12]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = or <8 x i32> [[TMP2]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP11]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP9]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP13]], i8* [[TMP12]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] +; ENABLED_MASKED_STRIDED: pred.store.continue: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if4: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP15]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP9]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP17]], i8* [[TMP16]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE5]] +; ENABLED_MASKED_STRIDED: pred.store.continue5: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if6: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP19]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i8> [[TMP9]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP21]], i8* [[TMP20]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE7]] +; ENABLED_MASKED_STRIDED: pred.store.continue7: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if8: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP23]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <8 x i8> [[TMP9]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP25]], i8* [[TMP24]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] +; ENABLED_MASKED_STRIDED: pred.store.continue9: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if10: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP27]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i8> [[TMP9]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP29]], i8* [[TMP28]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] +; ENABLED_MASKED_STRIDED: pred.store.continue11: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if12: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP31]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i8> [[TMP9]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP33]], i8* [[TMP32]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] +; ENABLED_MASKED_STRIDED: pred.store.continue13: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if14: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP35]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = extractelement <8 x i8> [[TMP9]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP37]], i8* [[TMP36]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] +; ENABLED_MASKED_STRIDED: pred.store.continue15: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if16: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP39]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i8> [[TMP9]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP41]], i8* [[TMP40]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE17]] +; ENABLED_MASKED_STRIDED: pred.store.continue17: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = sub <8 x i8> zeroinitializer, [[TMP9]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP43]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if18: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i32> [[TMP7]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP44]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i8> [[TMP42]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP46]], i8* [[TMP45]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE19]] +; ENABLED_MASKED_STRIDED: pred.store.continue19: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = extractelement <8 x i1> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP47]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if20: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = extractelement <8 x i32> [[TMP7]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP48]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = extractelement <8 x i8> [[TMP42]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP50]], i8* [[TMP49]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE21]] +; ENABLED_MASKED_STRIDED: pred.store.continue21: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if22: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP7]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP52]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i8> [[TMP42]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP54]], i8* [[TMP53]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE23]] +; ENABLED_MASKED_STRIDED: pred.store.continue23: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = extractelement <8 x i1> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP55]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if24: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = extractelement <8 x i32> [[TMP7]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP56]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i8> [[TMP42]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP58]], i8* [[TMP57]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE25]] +; ENABLED_MASKED_STRIDED: pred.store.continue25: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP59]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if26: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i32> [[TMP7]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP60]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = extractelement <8 x i8> [[TMP42]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP62]], i8* [[TMP61]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE27]] +; ENABLED_MASKED_STRIDED: pred.store.continue27: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP63]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if28: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i32> [[TMP7]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP64]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i8> [[TMP42]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP66]], i8* [[TMP65]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE29]] +; ENABLED_MASKED_STRIDED: pred.store.continue29: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = extractelement <8 x i1> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP67]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if30: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = extractelement <8 x i32> [[TMP7]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP68]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i8> [[TMP42]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP70]], i8* [[TMP69]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.continue31: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = extractelement <8 x i1> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP71]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33]] +; ENABLED_MASKED_STRIDED: pred.store.if32: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i32> [[TMP7]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP72]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = extractelement <8 x i8> [[TMP42]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP74]], i8* [[TMP73]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE33]] +; ENABLED_MASKED_STRIDED: pred.store.continue33: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP75]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ; @@ -2303,30 +2583,169 @@ ; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer ; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; ENABLED_MASKED_STRIDED: vector.body: -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[INDEX]], i32 0 -; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer -; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], -; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE31:%.*]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE31]] ] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) +; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> -; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) +; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = or <8 x i32> [[TMP1]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP9]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i8> [[TMP7]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] +; ENABLED_MASKED_STRIDED: pred.store.continue: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if2: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP13]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i8> [[TMP7]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE3]] +; ENABLED_MASKED_STRIDED: pred.store.continue3: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if4: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP17]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP7]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP19]], i8* [[TMP18]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE5]] +; ENABLED_MASKED_STRIDED: pred.store.continue5: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if6: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP21]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i8> [[TMP7]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP23]], i8* [[TMP22]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE7]] +; ENABLED_MASKED_STRIDED: pred.store.continue7: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if8: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP25]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i8> [[TMP7]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP27]], i8* [[TMP26]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] +; ENABLED_MASKED_STRIDED: pred.store.continue9: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if10: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP29]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = extractelement <8 x i8> [[TMP7]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP31]], i8* [[TMP30]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] +; ENABLED_MASKED_STRIDED: pred.store.continue11: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if12: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP33]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP7]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP35]], i8* [[TMP34]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] +; ENABLED_MASKED_STRIDED: pred.store.continue13: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if14: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP37]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i8> [[TMP7]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] +; ENABLED_MASKED_STRIDED: pred.store.continue15: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = sub <8 x i8> zeroinitializer, [[TMP7]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if16: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = extractelement <8 x i32> [[TMP5]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP42]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i8> [[TMP40]], i32 0 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP44]], i8* [[TMP43]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE17]] +; ENABLED_MASKED_STRIDED: pred.store.continue17: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if18: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i32> [[TMP5]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP46]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = extractelement <8 x i8> [[TMP40]], i32 1 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP48]], i8* [[TMP47]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE19]] +; ENABLED_MASKED_STRIDED: pred.store.continue19: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if20: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = extractelement <8 x i32> [[TMP5]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP50]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i8> [[TMP40]], i32 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP52]], i8* [[TMP51]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE21]] +; ENABLED_MASKED_STRIDED: pred.store.continue21: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if22: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i32> [[TMP5]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP54]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = extractelement <8 x i8> [[TMP40]], i32 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP56]], i8* [[TMP55]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE23]] +; ENABLED_MASKED_STRIDED: pred.store.continue23: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if24: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP58]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i8> [[TMP40]], i32 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP60]], i8* [[TMP59]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE25]] +; ENABLED_MASKED_STRIDED: pred.store.continue25: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP61]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if26: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP62]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i8> [[TMP40]], i32 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP64]], i8* [[TMP63]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE27]] +; ENABLED_MASKED_STRIDED: pred.store.continue27: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] +; ENABLED_MASKED_STRIDED: pred.store.if28: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i32> [[TMP5]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP66]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = extractelement <8 x i8> [[TMP40]], i32 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP68]], i8* [[TMP67]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE29]] +; ENABLED_MASKED_STRIDED: pred.store.continue29: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.if30: +; ENABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP5]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP70]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i8> [[TMP40]], i32 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP72]], i8* [[TMP71]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE31]] +; ENABLED_MASKED_STRIDED: pred.store.continue31: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP11:!llvm.loop !.*]] +; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], +; ENABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP73]], label [[FOR_END]], label [[VECTOR_BODY]], [[LOOP11:!llvm.loop !.*]] ; ENABLED_MASKED_STRIDED: for.end: ; ENABLED_MASKED_STRIDED-NEXT: ret void ;