diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3229,49 +3229,128 @@ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); - // Handle non-power-of-two vectors such as <3 x float> and <48 x i16> - if (auto *VTy = dyn_cast(Src)) { - const unsigned NumElem = VTy->getNumElements(); - if (!isPowerOf2_32(NumElem)) { - // Factorize NumElem into sum of power-of-two. - InstructionCost Cost = 0; - unsigned NumElemDone = 0; - for (unsigned NumElemLeft = NumElem, Factor; - Factor = PowerOf2Floor(NumElemLeft), NumElemLeft > 0; - NumElemLeft -= Factor) { - Type *SubTy = FixedVectorType::get(VTy->getScalarType(), Factor); - unsigned SubTyBytes = SubTy->getPrimitiveSizeInBits() / 8; - - Cost += - getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind); - - std::pair LST = TLI->getTypeLegalizationCost(DL, SubTy); - if (!LST.second.isVector()) { - APInt DemandedElts = - APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor); - Cost += getScalarizationOverhead(VTy, DemandedElts, - Opcode == Instruction::Load, - Opcode == Instruction::Store); - } - - NumElemDone += Factor; - Alignment = commonAlignment(Alignment.valueOrOne(), SubTyBytes); - } - assert(NumElemDone == NumElem && "Processed wrong element count?"); - return Cost; - } - } - // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - // Each load/store unit costs 1. - int Cost = LT.first * 1; + auto *VTy = dyn_cast(Src); - // This isn't exactly right. We're using slow unaligned 32-byte accesses as a - // proxy for a double-pumped AVX memory interface such as on Sandybridge. - if (LT.second.getStoreSize() == 32 && ST->isUnalignedMem32Slow()) - Cost *= 2; + // Handle the simple case of non-vectors. + // NOTE: this assumes that legalizatio never creates vector from scalars! + if (!VTy || !LT.second.isVector()) + // Each load/store unit costs 1. + return LT.first * 1; + + Type *EltTy = VTy->getElementType(); + + const int EltTyBits = DL.getTypeSizeInBits(VTy->getElementType()); + assert(EltTyBits > 0 && "Sizeless type?"); + assert(EltTyBits % 8 == 0 && "Non-byte-sized type?"); + const int EltTyBytes = EltTyBits / 8; + assert(EltTyBytes != 0); + + InstructionCost Cost = 0; + + // Source of truth: how many elements were there in the original IR vector? + const unsigned SrcNumElt = VTy->getNumElements(); + + // How far have we gotten? + int NumEltRemaining = SrcNumElt; + auto NumEltDone = [&]() { return SrcNumElt - NumEltRemaining; }; + + assert(LT.second.getSizeInBits() % 8 == 0 && "Non-byte-sized legal type?"); + const int MaxLegalOpSizeBytes = LT.second.getSizeInBits() / 8; + assert(MaxLegalOpSizeBytes != 0); + + // With what size are we currently operating? + int CurrOpSizeBytes = MaxLegalOpSizeBytes; + int CurrNumEltPerOp = CurrOpSizeBytes / EltTyBytes; + + // Note that even if we can store 64 bits of an XMM, we still operate on XMM. + const unsigned XMMBits = 128; + assert(XMMBits % EltTyBits == 0 && "Filing XMM with EltTy leaves padding."); + const int NumEltPerXMM = XMMBits / EltTyBits; + + auto *XMMVecTy = FixedVectorType::get(EltTy, NumEltPerXMM); + + for (int SubVecEltsLeft = 0; NumEltRemaining > 0; + CurrOpSizeBytes /= 2, CurrNumEltPerOp /= 2) { + assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 && "How'd we get here?"); + assert((((NumEltRemaining * EltTyBytes) < (2 * CurrOpSizeBytes)) || + (CurrOpSizeBytes == MaxLegalOpSizeBytes)) && + "Unless we haven't halved the op size yet, " + "we have less than two op's sized units of work left."); + + auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM + ? FixedVectorType::get(EltTy, CurrNumEltPerOp) + : XMMVecTy; + + assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 && "???"); + auto *CoalescedVecTy = + CurrNumEltPerOp == 1 + ? CurrVecTy + : FixedVectorType::get( + IntegerType::get(Src->getContext(), + EltTyBits * CurrNumEltPerOp), + CurrVecTy->getNumElements() / CurrNumEltPerOp); + assert(DL.getTypeSizeInBits(CoalescedVecTy) == + DL.getTypeSizeInBits(CurrVecTy) && + "coalesciing elements doesn't change vector width."); + + while (NumEltRemaining > 0) { + assert(SubVecEltsLeft >= 0 && "Subreg element count overconsumtion?"); + + // Can we use this vector size, as per the remaining element count? + // Iff the vector is naturally aligned, we can do a wide load regardless. + if (NumEltRemaining < CurrNumEltPerOp && + (Opcode != Instruction::Load || + Alignment.valueOrOne() < CurrOpSizeBytes)) + break; // Try smalled vector size. + + bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0; + + // If we have fully processed the previous reg, we need to replenish it. + if (SubVecEltsLeft == 0) { + SubVecEltsLeft += CurrVecTy->getNumElements(); + // And that's free only for the 0'th subvector of a legalized vector. + if (!Is0thSubVec) + Cost += getShuffleCost(Opcode == Instruction::Load + ? TTI::ShuffleKind::SK_InsertSubvector + : TTI::ShuffleKind::SK_ExtractSubvector, + VTy, None, NumEltDone(), CurrVecTy); + } + + // While we can directly load/store ZMM, YMM, and 64-bit halves of XMM, + // for smaller widths (32/16/8) we have to insert/extract them separately. + // Again, it's free for the 0'th subreg (if op is 32/64 bit wide, + // but let's pretend that it is also true for 16/8 bit wide ops...) + if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) { + int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM; + assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 && ""); + int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp; + APInt DemandedElts = + APInt::getBitsSet(CoalescedVecTy->getNumElements(), + CoalescedVecEltIdx, CoalescedVecEltIdx + 1); + assert(DemandedElts.countPopulation() == 1 && "Inserting single value"); + Cost += getScalarizationOverhead(CoalescedVecTy, DemandedElts, + Opcode == Instruction::Load, + Opcode == Instruction::Store); + } + + // This isn't exactly right. We're using slow unaligned 32-byte accesses + // as a proxy for a double-pumped AVX memory interface such as on + // Sandybridge. + if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow()) + Cost += 2; + else + Cost += 1; + + SubVecEltsLeft -= CurrNumEltPerOp; + NumEltRemaining -= CurrNumEltPerOp; + Alignment = commonAlignment(Alignment.valueOrOne(), CurrOpSizeBytes); + } + } + + assert(NumEltRemaining <= 0 && "Should have processed all the elements."); return Cost; } diff --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll --- a/llvm/test/Analysis/CostModel/X86/load_store.ll +++ b/llvm/test/Analysis/CostModel/X86/load_store.ll @@ -22,19 +22,19 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -54,19 +54,19 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -86,19 +86,19 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -160,19 +160,19 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -192,19 +192,19 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -224,19 +224,19 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -296,23 +296,23 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'loads' @@ -328,23 +328,23 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'loads' @@ -360,23 +360,23 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 4 @@ -433,24 +433,24 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'loads_align' @@ -465,24 +465,24 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'loads_align' @@ -497,24 +497,24 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 64