diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -37,7 +37,15 @@ const RISCVSubtarget *getST() const { return ST; } const RISCVTargetLowering *getTLI() const { return TLI; } - unsigned getMaxVLFor(VectorType *Ty); + /// This function returns an estimate for VL to be used in VL based terms + /// of the cost model. For fixed length vectors, this is simply the + /// vector length. For scalable vectors, we return results consistent + /// with getVScaleForTuning under the assumption that clients are also + /// using that when comparing costs between scalar and vector representation. + /// This does unfortunately mean that we can both undershoot and overshot + /// the true cost significantly if getVScaleForTuning is wildly off for the + /// actual target hardware. + unsigned getEstimatedVLFor(VectorType *Ty); public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -240,12 +240,12 @@ Alignment, CostKind, I); // Cost is proportional to the number of memory operations implied. For - // scalable vectors, we use an upper bound on that number since we don't + // scalable vectors, we use an estimate on that number since we don't // know exactly what VL will be. auto &VTy = *cast(DataTy); InstructionCost MemOpCost = getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind, I); - unsigned NumLoads = getMaxVLFor(&VTy); + unsigned NumLoads = getEstimatedVLFor(&VTy); return NumLoads * MemOpCost; } @@ -343,12 +343,12 @@ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } -unsigned RISCVTTIImpl::getMaxVLFor(VectorType *Ty) { +unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) { if (isa(Ty)) { const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType()); const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue(); - const unsigned VectorBitsMax = ST->getRealMaxVLen(); - return RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); + const unsigned VectorBits = *getVScaleForTuning() * RISCV::RVVBitsPerBlock; + return RISCVTargetLowering::computeVLMAX(VectorBits, EltSize, MinSize); } return cast(Ty)->getNumElements(); } @@ -372,7 +372,7 @@ // IR Reduction is composed by two vmv and one rvv reduction instruction. InstructionCost BaseCost = 2; - unsigned VL = getMaxVLFor(Ty); + unsigned VL = getEstimatedVLFor(Ty); return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL); } @@ -401,7 +401,7 @@ // IR Reduction is composed by two vmv and one rvv reduction instruction. InstructionCost BaseCost = 2; - unsigned VL = getMaxVLFor(Ty); + unsigned VL = getEstimatedVLFor(Ty); if (TTI::requiresOrderedReduction(FMF)) return (LT.first - 1) + BaseCost + VL; return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL); diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll --- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll @@ -6,7 +6,7 @@ define half @vreduce_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv1f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) @@ -15,7 +15,7 @@ define half @vreduce_ord_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1026 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) @@ -26,7 +26,7 @@ define half @vreduce_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv2f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) @@ -35,7 +35,7 @@ define half @vreduce_ord_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2050 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) @@ -46,7 +46,7 @@ define half @vreduce_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) @@ -55,7 +55,7 @@ define half @vreduce_ord_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4098 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) @@ -66,7 +66,7 @@ define float @vreduce_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv1f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) @@ -75,7 +75,7 @@ define float @vreduce_ord_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1026 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) @@ -85,7 +85,7 @@ define float @vreduce_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to @@ -96,7 +96,7 @@ define float @vreduce_ord_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1026 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to @@ -108,7 +108,7 @@ define float @vreduce_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv2f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) @@ -117,7 +117,7 @@ define float @vreduce_ord_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2050 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) @@ -127,7 +127,7 @@ define float @vreduce_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to @@ -138,7 +138,7 @@ define float @vreduce_ord_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 2050 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to @@ -150,7 +150,7 @@ define float @vreduce_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) @@ -159,7 +159,7 @@ define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4098 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) @@ -169,7 +169,7 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to @@ -180,7 +180,7 @@ define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 4098 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to @@ -192,7 +192,7 @@ define double @vreduce_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv1f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) @@ -201,7 +201,7 @@ define double @vreduce_ord_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1026 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) @@ -211,7 +211,7 @@ define double @vreduce_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to @@ -222,7 +222,7 @@ define double @vreduce_ord_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1026 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to @@ -234,7 +234,7 @@ define double @vreduce_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv2f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) @@ -243,7 +243,7 @@ define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2050 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) @@ -253,7 +253,7 @@ define double @vreduce_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to @@ -264,7 +264,7 @@ define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 2050 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to @@ -276,7 +276,7 @@ define double @vreduce_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv4f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) @@ -285,7 +285,7 @@ define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4098 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) @@ -295,7 +295,7 @@ define double @vreduce_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to @@ -306,7 +306,7 @@ define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 4098 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to @@ -318,7 +318,7 @@ define half @vreduce_fmin_nxv1f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -327,7 +327,7 @@ define half @vreduce_fmin_nxv1f16_nonans( %v) #0 { ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -336,7 +336,7 @@ define half @vreduce_fmin_nxv1f16_nonans_noinfs( %v) #1 { ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -347,7 +347,7 @@ define half @vreduce_fmin_nxv2f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv2f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) @@ -358,7 +358,7 @@ define half @vreduce_fmin_nxv4f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) @@ -369,7 +369,7 @@ define half @vreduce_fmin_nxv64f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv64f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) @@ -380,7 +380,7 @@ define float @vreduce_fmin_nxv1f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -389,7 +389,7 @@ define float @vreduce_fmin_nxv1f32_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -398,7 +398,7 @@ define float @vreduce_fmin_nxv1f32_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -409,7 +409,7 @@ define float @vreduce_fmin_nxv2f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv2f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) @@ -420,7 +420,7 @@ define float @vreduce_fmin_nxv4f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) @@ -431,7 +431,7 @@ define float @vreduce_fmin_nxv32f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv32f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) @@ -442,7 +442,7 @@ define double @vreduce_fmin_nxv1f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) @@ -451,7 +451,7 @@ define double @vreduce_fmin_nxv1f64_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) @@ -460,7 +460,7 @@ define double @vreduce_fmin_nxv1f64_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) @@ -471,7 +471,7 @@ define double @vreduce_fmin_nxv2f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv2f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) @@ -482,7 +482,7 @@ define double @vreduce_fmin_nxv4f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv4f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) @@ -493,7 +493,7 @@ define double @vreduce_fmin_nxv16f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv16f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) @@ -504,7 +504,7 @@ define half @vreduce_fmax_nxv1f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -513,7 +513,7 @@ define half @vreduce_fmax_nxv1f16_nonans( %v) #0 { ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -522,7 +522,7 @@ define half @vreduce_fmax_nxv1f16_nonans_noinfs( %v) #1 { ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -533,7 +533,7 @@ define half @vreduce_fmax_nxv2f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv2f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) @@ -544,7 +544,7 @@ define half @vreduce_fmax_nxv4f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv4f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) @@ -555,7 +555,7 @@ define half @vreduce_fmax_nxv64f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv64f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) @@ -566,7 +566,7 @@ define float @vreduce_fmax_nxv1f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -575,7 +575,7 @@ define float @vreduce_fmax_nxv1f32_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -584,7 +584,7 @@ define float @vreduce_fmax_nxv1f32_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -595,7 +595,7 @@ define float @vreduce_fmax_nxv2f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv2f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) @@ -606,7 +606,7 @@ define float @vreduce_fmax_nxv4f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) @@ -617,7 +617,7 @@ define float @vreduce_fmax_nxv32f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv32f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) @@ -628,7 +628,7 @@ define double @vreduce_fmax_nxv1f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) @@ -637,7 +637,7 @@ define double @vreduce_fmax_nxv1f64_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) @@ -646,7 +646,7 @@ define double @vreduce_fmax_nxv1f64_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) @@ -657,7 +657,7 @@ define double @vreduce_fmax_nxv2f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv2f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) @@ -668,7 +668,7 @@ define double @vreduce_fmax_nxv4f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv4f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) @@ -679,7 +679,7 @@ define double @vreduce_fmax_nxv16f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv16f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) @@ -688,7 +688,7 @@ define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll --- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll @@ -6,7 +6,7 @@ define signext i8 @vreduce_add_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) @@ -17,7 +17,7 @@ define signext i8 @vreduce_umax_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) @@ -28,7 +28,7 @@ define signext i8 @vreduce_smax_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) @@ -39,7 +39,7 @@ define signext i8 @vreduce_umin_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) @@ -50,7 +50,7 @@ define signext i8 @vreduce_smin_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) @@ -61,7 +61,7 @@ define signext i8 @vreduce_and_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) @@ -72,7 +72,7 @@ define signext i8 @vreduce_or_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) @@ -83,7 +83,7 @@ define signext i8 @vreduce_xor_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) @@ -94,7 +94,7 @@ define signext i8 @vreduce_add_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) @@ -105,7 +105,7 @@ define signext i8 @vreduce_umax_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) @@ -116,7 +116,7 @@ define signext i8 @vreduce_smax_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) @@ -127,7 +127,7 @@ define signext i8 @vreduce_umin_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) @@ -138,7 +138,7 @@ define signext i8 @vreduce_smin_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) @@ -149,7 +149,7 @@ define signext i8 @vreduce_and_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) @@ -160,7 +160,7 @@ define signext i8 @vreduce_or_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) @@ -171,7 +171,7 @@ define signext i8 @vreduce_xor_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) @@ -182,7 +182,7 @@ define signext i8 @vreduce_add_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) @@ -193,7 +193,7 @@ define signext i8 @vreduce_umax_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) @@ -204,7 +204,7 @@ define signext i8 @vreduce_smax_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) @@ -215,7 +215,7 @@ define signext i8 @vreduce_umin_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) @@ -226,7 +226,7 @@ define signext i8 @vreduce_smin_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) @@ -237,7 +237,7 @@ define signext i8 @vreduce_and_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) @@ -248,7 +248,7 @@ define signext i8 @vreduce_or_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) @@ -259,7 +259,7 @@ define signext i8 @vreduce_xor_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) @@ -270,7 +270,7 @@ define signext i16 @vreduce_add_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) @@ -280,7 +280,7 @@ define signext i16 @vwreduce_add_nxv1i8( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to @@ -291,7 +291,7 @@ define signext i16 @vwreduce_uadd_nxv1i8( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to @@ -303,7 +303,7 @@ define signext i16 @vreduce_umax_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) @@ -314,7 +314,7 @@ define signext i16 @vreduce_smax_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) @@ -325,7 +325,7 @@ define signext i16 @vreduce_umin_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) @@ -336,7 +336,7 @@ define signext i16 @vreduce_smin_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) @@ -347,7 +347,7 @@ define signext i16 @vreduce_and_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) @@ -358,7 +358,7 @@ define signext i16 @vreduce_or_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) @@ -369,7 +369,7 @@ define signext i16 @vreduce_xor_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) @@ -380,7 +380,7 @@ define signext i16 @vreduce_add_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) @@ -390,7 +390,7 @@ define signext i16 @vwreduce_add_nxv2i8( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to @@ -401,7 +401,7 @@ define signext i16 @vwreduce_uadd_nxv2i8( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to @@ -413,7 +413,7 @@ define signext i16 @vreduce_umax_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) @@ -424,7 +424,7 @@ define signext i16 @vreduce_smax_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) @@ -435,7 +435,7 @@ define signext i16 @vreduce_umin_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) @@ -446,7 +446,7 @@ define signext i16 @vreduce_smin_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) @@ -457,7 +457,7 @@ define signext i16 @vreduce_and_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) @@ -468,7 +468,7 @@ define signext i16 @vreduce_or_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) @@ -479,7 +479,7 @@ define signext i16 @vreduce_xor_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) @@ -490,7 +490,7 @@ define signext i16 @vreduce_add_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) @@ -500,7 +500,7 @@ define signext i16 @vwreduce_add_nxv4i8( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to @@ -511,7 +511,7 @@ define signext i16 @vwreduce_uadd_nxv4i8( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to @@ -523,7 +523,7 @@ define signext i16 @vreduce_umax_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) @@ -534,7 +534,7 @@ define signext i16 @vreduce_smax_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) @@ -545,7 +545,7 @@ define signext i16 @vreduce_umin_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) @@ -556,7 +556,7 @@ define signext i16 @vreduce_smin_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) @@ -567,7 +567,7 @@ define signext i16 @vreduce_and_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) @@ -578,7 +578,7 @@ define signext i16 @vreduce_or_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) @@ -589,7 +589,7 @@ define signext i16 @vreduce_xor_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) @@ -600,7 +600,7 @@ define signext i32 @vreduce_add_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) @@ -610,7 +610,7 @@ define signext i32 @vwreduce_add_nxv1i16( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = sext %v to @@ -621,7 +621,7 @@ define signext i32 @vwreduce_uadd_nxv1i16( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = zext %v to @@ -633,7 +633,7 @@ define signext i32 @vreduce_umax_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) @@ -644,7 +644,7 @@ define signext i32 @vreduce_smax_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) @@ -655,7 +655,7 @@ define signext i32 @vreduce_umin_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) @@ -666,7 +666,7 @@ define signext i32 @vreduce_smin_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) @@ -677,7 +677,7 @@ define signext i32 @vreduce_and_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) @@ -688,7 +688,7 @@ define signext i32 @vreduce_or_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) @@ -699,7 +699,7 @@ define signext i32 @vreduce_xor_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) @@ -710,7 +710,7 @@ define signext i32 @vreduce_add_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) @@ -720,7 +720,7 @@ define signext i32 @vwreduce_add_nxv2i16( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = sext %v to @@ -731,7 +731,7 @@ define signext i32 @vwreduce_uadd_nxv2i16( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = zext %v to @@ -743,7 +743,7 @@ define signext i32 @vreduce_umax_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) @@ -754,7 +754,7 @@ define signext i32 @vreduce_smax_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) @@ -765,7 +765,7 @@ define signext i32 @vreduce_umin_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) @@ -776,7 +776,7 @@ define signext i32 @vreduce_smin_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) @@ -787,7 +787,7 @@ define signext i32 @vreduce_and_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) @@ -798,7 +798,7 @@ define signext i32 @vreduce_or_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) @@ -809,7 +809,7 @@ define signext i32 @vreduce_xor_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) @@ -820,7 +820,7 @@ define signext i32 @vreduce_add_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) @@ -830,7 +830,7 @@ define signext i32 @vwreduce_add_nxv4i16( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = sext %v to @@ -841,7 +841,7 @@ define signext i32 @vwreduce_uadd_nxv4i16( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = zext %v to @@ -853,7 +853,7 @@ define signext i32 @vreduce_umax_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) @@ -864,7 +864,7 @@ define signext i32 @vreduce_smax_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) @@ -875,7 +875,7 @@ define signext i32 @vreduce_umin_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) @@ -886,7 +886,7 @@ define signext i32 @vreduce_smin_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) @@ -897,7 +897,7 @@ define signext i32 @vreduce_and_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) @@ -908,7 +908,7 @@ define signext i32 @vreduce_or_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) @@ -919,7 +919,7 @@ define signext i32 @vreduce_xor_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) @@ -930,7 +930,7 @@ define i64 @vreduce_add_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) @@ -940,7 +940,7 @@ define i64 @vwreduce_add_nxv1i32( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = sext %v to @@ -951,7 +951,7 @@ define i64 @vwreduce_uadd_nxv1i32( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = zext %v to @@ -963,7 +963,7 @@ define i64 @vreduce_umax_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) @@ -974,7 +974,7 @@ define i64 @vreduce_smax_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) @@ -985,7 +985,7 @@ define i64 @vreduce_umin_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) @@ -996,7 +996,7 @@ define i64 @vreduce_smin_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) @@ -1007,7 +1007,7 @@ define i64 @vreduce_and_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) @@ -1018,7 +1018,7 @@ define i64 @vreduce_or_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) @@ -1029,7 +1029,7 @@ define i64 @vreduce_xor_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) @@ -1040,7 +1040,7 @@ define i64 @vreduce_add_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) @@ -1050,7 +1050,7 @@ define i64 @vwreduce_add_nxv2i32( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = sext %v to @@ -1061,7 +1061,7 @@ define i64 @vwreduce_uadd_nxv2i32( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = zext %v to @@ -1073,7 +1073,7 @@ define i64 @vreduce_umax_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) @@ -1084,7 +1084,7 @@ define i64 @vreduce_smax_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) @@ -1095,7 +1095,7 @@ define i64 @vreduce_umin_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) @@ -1106,7 +1106,7 @@ define i64 @vreduce_smin_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) @@ -1117,7 +1117,7 @@ define i64 @vreduce_and_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) @@ -1128,7 +1128,7 @@ define i64 @vreduce_or_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) @@ -1139,7 +1139,7 @@ define i64 @vreduce_xor_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) @@ -1150,7 +1150,7 @@ define i64 @vreduce_add_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) @@ -1160,7 +1160,7 @@ define i64 @vwreduce_add_nxv4i32( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = sext %v to @@ -1171,7 +1171,7 @@ define i64 @vwreduce_uadd_nxv4i32( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = zext %v to @@ -1183,7 +1183,7 @@ define i64 @vreduce_umax_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) @@ -1194,7 +1194,7 @@ define i64 @vreduce_smax_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) @@ -1205,7 +1205,7 @@ define i64 @vreduce_umin_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) @@ -1216,7 +1216,7 @@ define i64 @vreduce_smin_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) @@ -1227,7 +1227,7 @@ define i64 @vreduce_and_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) @@ -1238,7 +1238,7 @@ define i64 @vreduce_or_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) @@ -1249,7 +1249,7 @@ define i64 @vreduce_xor_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll --- a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll @@ -5,91 +5,91 @@ define void @masked_gather_aligned() { ; GENERIC-LABEL: 'masked_gather_aligned' -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8F64 = call @llvm.masked.gather.nxv8f64.nxv8p0f64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4F64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2F64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1F64 = call @llvm.masked.gather.nxv1f64.nxv1p0f64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: %V16F32 = call @llvm.masked.gather.nxv16f32.nxv16p0f32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8F32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4F32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2F32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1F32 = call @llvm.masked.gather.nxv1f32.nxv1p0f32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32768 for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0f16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: %V16F16 = call @llvm.masked.gather.nxv16f16.nxv16p0f16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8F16 = call @llvm.masked.gather.nxv8f16.nxv8p0f16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4F16 = call @llvm.masked.gather.nxv4f16.nxv4p0f16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2F16 = call @llvm.masked.gather.nxv2f16.nxv2p0f16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1F16 = call @llvm.masked.gather.nxv1f16.nxv1p0f16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8I64 = call @llvm.masked.gather.nxv8i64.nxv8p0i64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4I64 = call @llvm.masked.gather.nxv4i64.nxv4p0i64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2I64 = call @llvm.masked.gather.nxv2i64.nxv2p0i64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1I64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: %V16I32 = call @llvm.masked.gather.nxv16i32.nxv16p0i32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8I32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4I32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2I32 = call @llvm.masked.gather.nxv2i32.nxv2p0i32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1I32 = call @llvm.masked.gather.nxv1i32.nxv1p0i32( undef, i32 4, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32768 for instruction: %V32I16 = call @llvm.masked.gather.nxv32i16.nxv32p0i16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: %V16I16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8I16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4I16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2I16 = call @llvm.masked.gather.nxv2i16.nxv2p0i16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1I16 = call @llvm.masked.gather.nxv1i16.nxv1p0i16( undef, i32 2, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 65536 for instruction: %V64I8 = call @llvm.masked.gather.nxv64i8.nxv64p0i8( undef, i32 1, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32768 for instruction: %V32I8 = call @llvm.masked.gather.nxv32i8.nxv32p0i8( undef, i32 1, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: %V16I8 = call @llvm.masked.gather.nxv16i8.nxv16p0i8( undef, i32 1, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8I8 = call @llvm.masked.gather.nxv8i8.nxv8p0i8( undef, i32 1, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4I8 = call @llvm.masked.gather.nxv4i8.nxv4p0i8( undef, i32 1, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2I8 = call @llvm.masked.gather.nxv2i8.nxv2p0i8( undef, i32 1, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1I8 = call @llvm.masked.gather.nxv1i8.nxv1p0i8( undef, i32 1, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: %V8PTR = call @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: %V4PTR = call @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %V2PTR = call @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8( undef, i32 8, undef, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %V1PTR = call @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call @llvm.masked.gather.nxv8f64.nxv8p0f64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call @llvm.masked.gather.nxv1f64.nxv1p0f64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call @llvm.masked.gather.nxv16f32.nxv16p0f32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call @llvm.masked.gather.nxv1f32.nxv1p0f32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0f16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call @llvm.masked.gather.nxv16f16.nxv16p0f16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call @llvm.masked.gather.nxv8f16.nxv8p0f16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call @llvm.masked.gather.nxv4f16.nxv4p0f16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call @llvm.masked.gather.nxv2f16.nxv2p0f16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call @llvm.masked.gather.nxv1f16.nxv1p0f16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call @llvm.masked.gather.nxv8i64.nxv8p0i64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call @llvm.masked.gather.nxv4i64.nxv4p0i64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call @llvm.masked.gather.nxv2i64.nxv2p0i64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call @llvm.masked.gather.nxv16i32.nxv16p0i32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call @llvm.masked.gather.nxv2i32.nxv2p0i32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call @llvm.masked.gather.nxv1i32.nxv1p0i32( undef, i32 4, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call @llvm.masked.gather.nxv32i16.nxv32p0i16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call @llvm.masked.gather.nxv2i16.nxv2p0i16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call @llvm.masked.gather.nxv1i16.nxv1p0i16( undef, i32 2, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call @llvm.masked.gather.nxv64i8.nxv64p0i8( undef, i32 1, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call @llvm.masked.gather.nxv32i8.nxv32p0i8( undef, i32 1, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call @llvm.masked.gather.nxv16i8.nxv16p0i8( undef, i32 1, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call @llvm.masked.gather.nxv8i8.nxv8p0i8( undef, i32 1, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call @llvm.masked.gather.nxv4i8.nxv4p0i8( undef, i32 1, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call @llvm.masked.gather.nxv2i8.nxv2p0i8( undef, i32 1, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call @llvm.masked.gather.nxv1i8.nxv1p0i8( undef, i32 1, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8( undef, i32 8, undef, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8( undef, i32 8, undef, undef) ; GENERIC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; MAX256-LABEL: 'masked_gather_aligned' -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F64 = call @llvm.masked.gather.nxv8f64.nxv8p0f64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F64 = call @llvm.masked.gather.nxv1f64.nxv1p0f64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F32 = call @llvm.masked.gather.nxv16f32.nxv16p0f32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F32 = call @llvm.masked.gather.nxv1f32.nxv1p0f32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0f16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = call @llvm.masked.gather.nxv16f16.nxv16p0f16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F16 = call @llvm.masked.gather.nxv8f16.nxv8p0f16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F16 = call @llvm.masked.gather.nxv4f16.nxv4p0f16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F16 = call @llvm.masked.gather.nxv2f16.nxv2p0f16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = call @llvm.masked.gather.nxv1f16.nxv1p0f16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call @llvm.masked.gather.nxv8i64.nxv8p0i64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call @llvm.masked.gather.nxv4i64.nxv4p0i64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call @llvm.masked.gather.nxv2i64.nxv2p0i64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call @llvm.masked.gather.nxv16i32.nxv16p0i32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call @llvm.masked.gather.nxv2i32.nxv2p0i32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I32 = call @llvm.masked.gather.nxv1i32.nxv1p0i32( undef, i32 4, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call @llvm.masked.gather.nxv32i16.nxv32p0i16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call @llvm.masked.gather.nxv2i16.nxv2p0i16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I16 = call @llvm.masked.gather.nxv1i16.nxv1p0i16( undef, i32 2, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call @llvm.masked.gather.nxv64i8.nxv64p0i8( undef, i32 1, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call @llvm.masked.gather.nxv32i8.nxv32p0i8( undef, i32 1, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call @llvm.masked.gather.nxv16i8.nxv16p0i8( undef, i32 1, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call @llvm.masked.gather.nxv8i8.nxv8p0i8( undef, i32 1, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call @llvm.masked.gather.nxv4i8.nxv4p0i8( undef, i32 1, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call @llvm.masked.gather.nxv2i8.nxv2p0i8( undef, i32 1, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1I8 = call @llvm.masked.gather.nxv1i8.nxv1p0i8( undef, i32 1, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8PTR = call @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4PTR = call @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2PTR = call @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8( undef, i32 8, undef, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1PTR = call @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call @llvm.masked.gather.nxv8f64.nxv8p0f64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call @llvm.masked.gather.nxv4f64.nxv4p0f64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call @llvm.masked.gather.nxv2f64.nxv2p0f64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call @llvm.masked.gather.nxv1f64.nxv1p0f64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call @llvm.masked.gather.nxv16f32.nxv16p0f32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call @llvm.masked.gather.nxv8f32.nxv8p0f32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call @llvm.masked.gather.nxv4f32.nxv4p0f32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call @llvm.masked.gather.nxv2f32.nxv2p0f32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call @llvm.masked.gather.nxv1f32.nxv1p0f32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call @llvm.masked.gather.nxv32f16.nxv32p0f16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call @llvm.masked.gather.nxv16f16.nxv16p0f16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call @llvm.masked.gather.nxv8f16.nxv8p0f16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call @llvm.masked.gather.nxv4f16.nxv4p0f16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call @llvm.masked.gather.nxv2f16.nxv2p0f16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call @llvm.masked.gather.nxv1f16.nxv1p0f16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call @llvm.masked.gather.nxv8i64.nxv8p0i64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call @llvm.masked.gather.nxv4i64.nxv4p0i64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call @llvm.masked.gather.nxv2i64.nxv2p0i64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call @llvm.masked.gather.nxv1i64.nxv1p0i64( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call @llvm.masked.gather.nxv16i32.nxv16p0i32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call @llvm.masked.gather.nxv8i32.nxv8p0i32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call @llvm.masked.gather.nxv2i32.nxv2p0i32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call @llvm.masked.gather.nxv1i32.nxv1p0i32( undef, i32 4, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call @llvm.masked.gather.nxv32i16.nxv32p0i16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call @llvm.masked.gather.nxv16i16.nxv16p0i16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call @llvm.masked.gather.nxv8i16.nxv8p0i16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call @llvm.masked.gather.nxv4i16.nxv4p0i16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call @llvm.masked.gather.nxv2i16.nxv2p0i16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call @llvm.masked.gather.nxv1i16.nxv1p0i16( undef, i32 2, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call @llvm.masked.gather.nxv64i8.nxv64p0i8( undef, i32 1, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call @llvm.masked.gather.nxv32i8.nxv32p0i8( undef, i32 1, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call @llvm.masked.gather.nxv16i8.nxv16p0i8( undef, i32 1, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call @llvm.masked.gather.nxv8i8.nxv8p0i8( undef, i32 1, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call @llvm.masked.gather.nxv4i8.nxv4p0i8( undef, i32 1, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call @llvm.masked.gather.nxv2i8.nxv2p0i8( undef, i32 1, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call @llvm.masked.gather.nxv1i8.nxv1p0i8( undef, i32 1, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8( undef, i32 8, undef, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8( undef, i32 8, undef, undef) ; MAX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; UNSUPPORTED-LABEL: 'masked_gather_aligned' diff --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll --- a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll +++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll @@ -5,91 +5,91 @@ define void @masked_scatter_aligned() { ; GENERIC-LABEL: 'masked_scatter_aligned' -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32768 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32( undef, undef, i32 4, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32768 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16( undef, undef, i32 2, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 65536 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8( undef, undef, i32 1, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 32768 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8( undef, undef, i32 1, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8( undef, undef, i32 1, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8( undef, undef, i32 1, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8( undef, undef, i32 1, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( undef, undef, i32 1, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8( undef, undef, i32 1, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8( undef, undef, i32 8, undef) -; GENERIC-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32( undef, undef, i32 4, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16( undef, undef, i32 2, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8( undef, undef, i32 1, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8( undef, undef, i32 1, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8( undef, undef, i32 1, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8( undef, undef, i32 1, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8( undef, undef, i32 1, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( undef, undef, i32 1, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8( undef, undef, i32 1, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8( undef, undef, i32 8, undef) +; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8( undef, undef, i32 8, undef) ; GENERIC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; MAX256-LABEL: 'masked_scatter_aligned' -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32( undef, undef, i32 4, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16( undef, undef, i32 2, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8( undef, undef, i32 1, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8( undef, undef, i32 1, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8( undef, undef, i32 1, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8( undef, undef, i32 1, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8( undef, undef, i32 1, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( undef, undef, i32 1, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8( undef, undef, i32 1, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8( undef, undef, i32 8, undef) -; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32( undef, undef, i32 4, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16( undef, undef, i32 2, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8( undef, undef, i32 1, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8( undef, undef, i32 1, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8( undef, undef, i32 1, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8( undef, undef, i32 1, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8( undef, undef, i32 1, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8( undef, undef, i32 1, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8( undef, undef, i32 1, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8( undef, undef, i32 8, undef) +; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8( undef, undef, i32 8, undef) ; MAX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; UNSUPPORTED-LABEL: 'masked_scatter_aligned' diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -19,7 +19,10 @@ ; RV32-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8* ; RV32-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* ; RV32-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8* -; RV32-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; RV32-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 12, i64 [[TMP0]]) +; RV32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP1]] +; RV32-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV32: vector.memcheck: ; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 9985 ; RV32-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* @@ -36,42 +39,55 @@ ; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] ; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: +; RV32-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP2]] +; RV32-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] +; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 +; RV32-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; RV32-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer +; RV32-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 16, i32 0), poison, zeroinitializer) +; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] +; RV32-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP7:%.*]] = mul i64 16, [[TMP6]] +; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i32 0 +; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], <4 x i64> [[VEC_IND]] -; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> undef), !alias.scope !0 -; RV32-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[WIDE_MASKED_GATHER]], -; RV32-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], -; RV32-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[B]], <4 x i64> [[TMP2]] -; RV32-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[TMP3]], i32 8, <4 x i1> [[TMP1]], <4 x double> undef), !alias.scope !3 -; RV32-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[WIDE_MASKED_GATHER]] to <4 x double> -; RV32-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[WIDE_MASKED_GATHER12]], [[TMP4]] -; RV32-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[A]], <4 x i64> [[VEC_IND]] -; RV32-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP5]], <4 x double*> [[TMP6]], i32 8, <4 x i1> [[TMP1]]), !alias.scope !5, !noalias !7 -; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; RV32-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; RV32-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 -; RV32-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], [[VEC_IND]] +; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i32.nxv1p0i32( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef), !alias.scope !0 +; RV32-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i32 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[B]], [[TMP10]] +; RV32-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call @llvm.masked.gather.nxv1f64.nxv1p0f64( [[TMP11]], i32 8, [[TMP9]], undef), !alias.scope !3 +; RV32-NEXT: [[TMP12:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV32-NEXT: [[TMP13:%.*]] = fadd [[WIDE_MASKED_GATHER12]], [[TMP12]] +; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, double* [[A]], [[VEC_IND]] +; RV32-NEXT: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64( [[TMP13]], [[TMP14]], i32 8, [[TMP9]]), !alias.scope !5, !noalias !7 +; RV32-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV32-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV32-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: middle.block: -; RV32-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, 624 +; RV32-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV32-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; RV32: scalar.ph: -; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; RV32-NEXT: br label [[FOR_BODY:%.*]] ; RV32: for.body: ; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; RV32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] -; RV32-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 100 +; RV32-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP17]], 100 ; RV32-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; RV32: if.then: -; RV32-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 -; RV32-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP9]] -; RV32-NEXT: [[TMP10:%.*]] = load double, double* [[ARRAYIDX3]], align 8 -; RV32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double -; RV32-NEXT: [[ADD:%.*]] = fadd double [[TMP10]], [[CONV]] +; RV32-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 +; RV32-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP18]] +; RV32-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX3]], align 8 +; RV32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP17]] to double +; RV32-NEXT: [[ADD:%.*]] = fadd double [[TMP19]], [[CONV]] ; RV32-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] ; RV32-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; RV32-NEXT: br label [[FOR_INC]] @@ -87,7 +103,10 @@ ; RV64-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8* ; RV64-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* ; RV64-NEXT: [[B6:%.*]] = bitcast double* [[B:%.*]] to i8* -; RV64-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 12, i64 [[TMP0]]) +; RV64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP1]] +; RV64-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV64: vector.memcheck: ; RV64-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A]], i64 9985 ; RV64-NEXT: [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8* @@ -104,42 +123,55 @@ ; RV64-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]] ; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV64: vector.ph: +; RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP2]] +; RV64-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] +; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 +; RV64-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; RV64-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer +; RV64-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 16, i32 0), poison, zeroinitializer) +; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] +; RV64-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP7:%.*]] = mul i64 16, [[TMP6]] +; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i32 0 +; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], <4 x i64> [[VEC_IND]] -; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> undef), !alias.scope !0 -; RV64-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[WIDE_MASKED_GATHER]], -; RV64-NEXT: [[TMP2:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], -; RV64-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[B]], <4 x i64> [[TMP2]] -; RV64-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[TMP3]], i32 8, <4 x i1> [[TMP1]], <4 x double> undef), !alias.scope !3 -; RV64-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[WIDE_MASKED_GATHER]] to <4 x double> -; RV64-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[WIDE_MASKED_GATHER12]], [[TMP4]] -; RV64-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[A]], <4 x i64> [[VEC_IND]] -; RV64-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP5]], <4 x double*> [[TMP6]], i32 8, <4 x i1> [[TMP1]]), !alias.scope !5, !noalias !7 -; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; RV64-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; RV64-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624 -; RV64-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], [[VEC_IND]] +; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i32.nxv1p0i32( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef), !alias.scope !0 +; RV64-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i32 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[B]], [[TMP10]] +; RV64-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call @llvm.masked.gather.nxv1f64.nxv1p0f64( [[TMP11]], i32 8, [[TMP9]], undef), !alias.scope !3 +; RV64-NEXT: [[TMP12:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV64-NEXT: [[TMP13:%.*]] = fadd [[WIDE_MASKED_GATHER12]], [[TMP12]] +; RV64-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, double* [[A]], [[VEC_IND]] +; RV64-NEXT: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64( [[TMP13]], [[TMP14]], i32 8, [[TMP9]]), !alias.scope !5, !noalias !7 +; RV64-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV64-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: -; RV64-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, 624 +; RV64-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV64-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; RV64: scalar.ph: -; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; RV64-NEXT: br label [[FOR_BODY:%.*]] ; RV64: for.body: ; RV64-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; RV64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] -; RV64-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; RV64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 100 +; RV64-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; RV64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP17]], 100 ; RV64-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; RV64: if.then: -; RV64-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 -; RV64-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP9]] -; RV64-NEXT: [[TMP10:%.*]] = load double, double* [[ARRAYIDX3]], align 8 -; RV64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP8]] to double -; RV64-NEXT: [[ADD:%.*]] = fadd double [[TMP10]], [[CONV]] +; RV64-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 +; RV64-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP18]] +; RV64-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX3]], align 8 +; RV64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP17]] to double +; RV64-NEXT: [[ADD:%.*]] = fadd double [[TMP19]], [[CONV]] ; RV64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] ; RV64-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; RV64-NEXT: br label [[FOR_INC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -320,50 +320,75 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { ; VLENUNK-LABEL: @indexed_store( ; VLENUNK-NEXT: entry: +; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VLENUNK: vector.ph: +; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i32 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] +; VLENUNK: vector.body: +; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] +; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer)) +; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VLENUNK: middle.block: +; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; VLENUNK: scalar.ph: +; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] ; VLENUNK: for.body: -; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] +; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] ; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] -; VLENUNK-NEXT: store i64 [[V:%.*]], ptr [[AADDR]], align 8 +; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] +; VLENUNK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 ; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VLENUNK: for.end: ; VLENUNK-NEXT: ret void ; ; VLEN128-LABEL: @indexed_store( ; VLEN128-NEXT: entry: -; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i32 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]] -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]] -; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD1]] -; VLEN128-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP6]], i32 8, <2 x i1> ) -; VLEN128-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT3]], <2 x ptr> [[TMP7]], i32 8, <2 x i1> ) -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] +; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLEN128-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer)) +; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VLEN128: middle.block: -; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLEN128: scalar.ph: -; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; VLEN128-NEXT: br label [[FOR_BODY:%.*]] ; VLEN128: for.body: ; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -397,56 +422,82 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { ; VLENUNK-LABEL: @indexed_load( ; VLENUNK-NEXT: entry: +; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VLENUNK: vector.ph: +; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] +; VLENUNK: vector.body: +; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] +; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) +; VLENUNK-NEXT: [[TMP6]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; VLENUNK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VLENUNK: middle.block: +; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP6]]) +; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; VLENUNK: scalar.ph: +; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] ; VLENUNK: for.body: -; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] -; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] +; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] +; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] ; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] +; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] ; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 ; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VLENUNK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VLENUNK: for.end: -; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ] +; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; ; VLEN128-LABEL: @indexed_load( ; VLEN128-NEXT: entry: -; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: +; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]] -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; VLEN128-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]] -; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD2]] -; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> , <2 x i64> undef) -; VLEN128-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP7]], i32 8, <2 x i1> , <2 x i64> undef) -; VLEN128-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLEN128-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI1]], [[WIDE_MASKED_GATHER3]] -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] +; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) +; VLEN128-NEXT: [[TMP6]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLEN128: middle.block: -; VLEN128-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]] -; VLEN128-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) -; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP6]]) +; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLEN128: scalar.ph: -; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; VLEN128-NEXT: br label [[FOR_BODY:%.*]] ; VLEN128: for.body: ; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -460,7 +511,7 @@ ; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VLEN128: for.end: -; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: @@ -504,7 +555,7 @@ ; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -517,7 +568,7 @@ ; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VLENUNK: for.end: ; VLENUNK-NEXT: ret void ; @@ -596,7 +647,7 @@ ; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -609,7 +660,7 @@ ; VLENUNK-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 ; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VLENUNK: for.end: ; VLENUNK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -74,16 +74,46 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { ; CHECK-LABEL: @indexed_store( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] +; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] -; CHECK-NEXT: store i64 [[V:%.*]], ptr [[AADDR]], align 8 +; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] +; CHECK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -107,20 +137,53 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { ; CHECK-LABEL: @indexed_load( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] +; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]], undef) +; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP10]]) +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] +; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] ; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: @@ -168,7 +231,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -180,7 +243,7 @@ ; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -226,7 +289,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -239,7 +302,7 @@ ; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -338,21 +338,60 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { ; SCALABLE-LABEL: @conditional_uniform_load( ; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = add [[TMP2]], zeroinitializer +; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i32 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i32 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i32 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], i32 8, [[TMP8]], undef) +; SCALABLE-NEXT: [[TMP9:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[WIDE_MASKED_GATHER]], zeroinitializer +; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 +; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP11]], align 8 +; SCALABLE-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; SCALABLE: for.body: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] ; SCALABLE: do_load: -; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; SCALABLE-NEXT: br label [[LATCH]] ; SCALABLE: latch: ; SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void ; @@ -414,21 +453,66 @@ ; ; TF-SCALABLE-LABEL: @conditional_uniform_load( ; TF-SCALABLE-NEXT: entry: +; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] +; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TF-SCALABLE: vector.ph: +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] +; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i32 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i32 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; TF-SCALABLE: vector.body: +; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i32 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer +; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], i32 8, [[TMP12]], undef) +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer +; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], [[WIDE_MASKED_GATHER]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = or [[TMP12]], [[TMP14]] +; TF-SCALABLE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP15]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[PREDPHI]], ptr [[TMP17]], i32 8, [[TMP16]]) +; TF-SCALABLE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP18]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-SCALABLE: middle.block: +; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE: scalar.ph: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]] ; TF-SCALABLE: do_load: -; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 +; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; TF-SCALABLE-NEXT: br label [[LATCH]] ; TF-SCALABLE: latch: ; TF-SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ] -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] +; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void ; @@ -523,7 +607,7 @@ ; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -537,7 +621,7 @@ ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void ; @@ -608,7 +692,7 @@ ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -621,7 +705,7 @@ ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void ; @@ -698,7 +782,7 @@ ; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -712,7 +796,7 @@ ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void ; @@ -783,7 +867,7 @@ ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -796,7 +880,7 @@ ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void ; @@ -853,15 +937,53 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { ; SCALABLE-LABEL: @uniform_store_of_loop_varying( ; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = add [[TMP2]], zeroinitializer +; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i32 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i32 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i32 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer)) +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT2]], ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; SCALABLE: for.body: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; SCALABLE-NEXT: store i64 [[IV]], ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[ARRAYIDX]], align 8 +; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void ; @@ -912,15 +1034,56 @@ ; ; TF-SCALABLE-LABEL: @uniform_store_of_loop_varying( ; TF-SCALABLE-NEXT: entry: +; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] +; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TF-SCALABLE: vector.ph: +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] +; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i32 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i32 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i32 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; TF-SCALABLE: vector.body: +; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024) +; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-SCALABLE: middle.block: +; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE: scalar.ph: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B:%.*]], align 8 -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[ARRAYIDX]], align 8 +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 +; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void ; @@ -979,20 +1142,59 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { ; SCALABLE-LABEL: @conditional_uniform_store( ; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SCALABLE: vector.ph: +; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = add [[TMP2]], zeroinitializer +; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i32 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i32 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i32 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; SCALABLE: vector.body: +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i32 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP8]]) +; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP10]], align 8 +; SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; SCALABLE: middle.block: +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; SCALABLE: for.body: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] ; SCALABLE: do_store: -; SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 ; SCALABLE-NEXT: br label [[LATCH]] ; SCALABLE: latch: -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void ; @@ -1053,20 +1255,66 @@ ; ; TF-SCALABLE-LABEL: @conditional_uniform_store( ; TF-SCALABLE-NEXT: entry: +; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]] +; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; TF-SCALABLE: vector.ph: +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] +; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i32 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i32 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i32 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] +; TF-SCALABLE: vector.body: +; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i32 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer +; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP12]]) +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = or [[TMP12]], [[TMP15]] +; TF-SCALABLE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP17]], i32 8, [[TMP16]]) +; TF-SCALABLE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP18]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-SCALABLE: middle.block: +; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-SCALABLE: scalar.ph: +; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; TF-SCALABLE: for.body: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 ; TF-SCALABLE-NEXT: br i1 [[CMP]], label [[DO_STORE:%.*]], label [[LATCH]] ; TF-SCALABLE: do_store: -; TF-SCALABLE-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 +; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 ; TF-SCALABLE-NEXT: br label [[LATCH]] ; TF-SCALABLE: latch: -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] +; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void ; @@ -1159,7 +1407,7 @@ ; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1173,7 +1421,7 @@ ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void ; @@ -1244,7 +1492,7 @@ ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1257,7 +1505,7 @@ ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void ;