Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1023,24 +1023,26 @@ return Cost; } - InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - const Value *Ptr, bool VariableMask, - Align Alignment, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { - auto *VT = cast(DataTy); + InstructionCost getMaskedMemOpCost(unsigned Opcode, Type *DataTy, + Align Alignment, bool VariableMask, + bool IsGatherScatter, + TTI::TargetCostKind CostKind) { // Assume the target does not have support for gather/scatter operations + auto *VT = cast(DataTy); // and provide a rough estimate. // - // First, compute the cost of extracting the individual addresses and the - // individual memory operations. + // First, compute the cost of the individual memory operations. + InstructionCost AddrExtractCost = + !IsGatherScatter + ? 0 + : getVectorInstrCost(Instruction::ExtractElement, + FixedVectorType::get( + PointerType::get(VT->getElementType(), 0), + VT->getNumElements()), + -1); InstructionCost LoadCost = VT->getNumElements() * - (getVectorInstrCost( - Instruction::ExtractElement, - FixedVectorType::get(PointerType::get(VT->getElementType(), 0), - VT->getNumElements()), - -1) + + (AddrExtractCost + getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind)); // Next, compute the cost of packing the result in a vector. @@ -1069,6 +1071,21 @@ return LoadCost + PackingCost + ConditionalCost; } + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind) { + return getMaskedMemOpCost(Opcode, DataTy, Alignment, true, false, CostKind); + } + + InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + const Value *Ptr, bool VariableMask, + Align Alignment, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { + return getMaskedMemOpCost(Opcode, DataTy, Alignment, VariableMask, true, + CostKind); + } + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -130,6 +130,10 @@ unsigned getMaxInterleaveFactor(unsigned VF); + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind); + InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1036,6 +1036,16 @@ return Options; } +InstructionCost +AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind) { + if (!isa(Src)) + return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); + return 2; +} + InstructionCost AArch64TTIImpl::getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1438,12 +1438,8 @@ if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment)) return ST->getMVEVectorCostFactor(CostKind); } - if (!isa(Src)) - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, - CostKind); - // Scalar cost, which is currently very high due to the efficiency of the - // generated code. - return cast(Src)->getNumElements() * 8; + return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); } InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost( Index: llvm/test/Analysis/CostModel/ARM/cast_ldst.ll =================================================================== --- llvm/test/Analysis/CostModel/ARM/cast_ldst.ll +++ llvm/test/Analysis/CostModel/ARM/cast_ldst.ll @@ -1783,15 +1783,15 @@ define i32 @maskedload_extends() { ; CHECK-NEON-RECIP-LABEL: 'maskedload_extends' -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> @@ -1887,15 +1887,15 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_extends' -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> @@ -1939,15 +1939,15 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_extends' -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> @@ -1991,15 +1991,15 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'maskedload_extends' -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> @@ -2043,14 +2043,14 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedload_extends' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> @@ -2095,15 +2095,15 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_extends' -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> @@ -2147,15 +2147,15 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_extends' -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> @@ -2199,15 +2199,15 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'maskedload_extends' -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef) ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> @@ -2328,26 +2328,26 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'maskedstore_trunc' @@ -2414,26 +2414,26 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'maskedstore_trunc' @@ -2457,26 +2457,26 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'maskedstore_trunc' @@ -2500,26 +2500,26 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedstore_trunc' @@ -2543,9 +2543,9 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) @@ -2555,13 +2555,13 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; @@ -2586,26 +2586,26 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'maskedstore_trunc' @@ -2629,26 +2629,26 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'maskedstore_trunc' @@ -2672,26 +2672,26 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %v2816 = trunc <2 x i16> undef to <2 x i8> @@ -2743,13 +2743,13 @@ define i32 @maskedload_fpextends() { ; CHECK-NEON-RECIP-LABEL: 'maskedload_fpextends' -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2766,13 +2766,13 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'maskedload_fpextends' -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2789,13 +2789,13 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fpextends' -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2812,13 +2812,13 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_fpextends' -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2835,13 +2835,13 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'maskedload_fpextends' -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2858,13 +2858,13 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedload_fpextends' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2881,13 +2881,13 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_fpextends' -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2904,13 +2904,13 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_fpextends' -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2927,13 +2927,13 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'maskedload_fpextends' -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float> @@ -2985,14 +2985,14 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'maskedload_fptrunc' @@ -3004,13 +3004,13 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -3023,14 +3023,14 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_fptrunc' @@ -3042,14 +3042,14 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'maskedload_fptrunc' @@ -3061,14 +3061,14 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedload_fptrunc' @@ -3080,13 +3080,13 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; @@ -3099,14 +3099,14 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_fptrunc' @@ -3118,14 +3118,14 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'maskedload_fptrunc' @@ -3137,14 +3137,14 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %v21632 = fptrunc <2 x float> undef to <2 x half> Index: llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll @@ -0,0 +1,92 @@ +; REQUIRES: asserts +; RUN: opt -loop-vectorize -force-vector-interleave=1 -S -debug < %s 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s --check-prefix=CHECK-COST + +target triple = "aarch64-unknown-linux-gnu" + +; CHECK-COST: Checking a loop in "fixed_width" +; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction: store i32 2, i32* %arrayidx1, align 4 +; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction: store i32 2, i32* %arrayidx1, align 4 +; CHECK-COST: Selecting VF: 1. + +; We should decide this loop is not worth vectorising using fixed width vectors +define void @fixed_width(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 { +; CHECK-LABEL: @fixed_width( +; CHECK-NOT: vector.body +entry: + %cmp6 = icmp sgt i64 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.inc + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.inc + %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07 + %0 = load i32, i32* %arrayidx, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07 + store i32 2, i32* %arrayidx1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + + +; CHECK-COST: Checking a loop in "scalable" +; CHECK-COST: Found an estimated cost of 2 for VF vscale x 4 For instruction: store i32 2, i32* %arrayidx1, align 4 + +define void @scalable(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 { +; CHECK-LABEL: @scalable( +; CHECK: vector.body +; CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32 +entry: + %cmp6 = icmp sgt i64 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.inc + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.inc + %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07 + %0 = load i32, i32* %arrayidx, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07 + store i32 2, i32* %arrayidx1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0 +} + +attributes #0 = { "target-features"="+neon,+sve" } + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true}