diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3216,49 +3216,81 @@ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); - // Handle non-power-of-two vectors such as <3 x float> and <48 x i16> - if (auto *VTy = dyn_cast(Src)) { - const unsigned NumElem = VTy->getNumElements(); - if (!isPowerOf2_32(NumElem)) { - // Factorize NumElem into sum of power-of-two. - InstructionCost Cost = 0; - unsigned NumElemDone = 0; - for (unsigned NumElemLeft = NumElem, Factor; - Factor = PowerOf2Floor(NumElemLeft), NumElemLeft > 0; - NumElemLeft -= Factor) { - Type *SubTy = FixedVectorType::get(VTy->getScalarType(), Factor); - unsigned SubTyBytes = SubTy->getPrimitiveSizeInBits() / 8; - - Cost += - getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind); - - std::pair LST = TLI->getTypeLegalizationCost(DL, SubTy); - if (!LST.second.isVector()) { - APInt DemandedElts = - APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor); - Cost += getScalarizationOverhead(VTy, DemandedElts, - Opcode == Instruction::Load, - Opcode == Instruction::Store); - } - - NumElemDone += Factor; - Alignment = commonAlignment(Alignment.valueOrOne(), SubTyBytes); - } - assert(NumElemDone == NumElem && "Processed wrong element count?"); - return Cost; - } - } - // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - // Each load/store unit costs 1. - int Cost = LT.first * 1; + errs() << "LT " << *EVT(LT.second).getTypeForEVT(Src->getContext()) << "\n"; - // This isn't exactly right. We're using slow unaligned 32-byte accesses as a - // proxy for a double-pumped AVX memory interface such as on Sandybridge. - if (LT.second.getStoreSize() == 32 && ST->isUnalignedMem32Slow()) - Cost *= 2; + auto *VTy = dyn_cast(Src); + + // Handle the simple case of non-vectors. + // NOTE: this assumes that legalizatio never creates vector from scalars! + if (!VTy || !LT.second.isVector()) + // Each load/store unit costs 1. + return LT.first * 1; + + InstructionCost Cost = 0; + + // Source of truth: how many elements were there in the original IR vector? + const unsigned SrcNumElt = VTy->getNumElements(); + + // To what vector did we legalize? + const int LegalBytes = LT.second.getSizeInBits() / 8; + const int LegalNumElt = LT.second.getVectorNumElements(); + + int NumEltRemaining = SrcNumElt; + auto NumEltDone = [&]() { return SrcNumElt - NumEltRemaining; }; + + // With what size are we currently operating? + int CurrLegalBytes = LegalBytes; + int CurrLegalNumElt = LegalNumElt; + + auto *LegalizedTy = + EVT(LT.second.getVectorElementType()).getTypeForEVT(Src->getContext()); + int NumEltInXMM = 128 / LegalizedTy->getScalarSizeInBits(); + auto *LegalizedVTy = FixedVectorType::get( + LegalizedTy, LT.first * LT.second.getVectorNumElements()); + + // First, try to use whole-vector operations, with decreasing vector sizes. + for (; NumEltRemaining > 0 && CurrLegalBytes >= 64 / 8; + CurrLegalBytes /= 2, CurrLegalNumElt /= 2) { + assert(CurrLegalNumElt > 0 && "Reduced vector size too much?"); + auto *CurrTy = + FixedVectorType::get(LegalizedVTy->getElementType(), + std::max((int)CurrLegalNumElt, NumEltInXMM)); + while (NumEltRemaining > 0) { + // Can we use this vector size, as per the remaining element count? + // Iff the vector is naturally aligned, we can do a wide load regardless. + if (NumEltRemaining < CurrLegalNumElt && + (Opcode != Instruction::Load || + Alignment.valueOrOne() < CurrLegalBytes)) + break; // Try smalled vector size. + + if (CurrLegalBytes != LegalBytes) { + Cost += getShuffleCost(Opcode == Instruction::Load + ? TTI::ShuffleKind::SK_InsertSubvector + : TTI::ShuffleKind::SK_ExtractSubvector, + LegalizedVTy, None, NumEltDone(), CurrTy); + } + + Cost += 1; + NumEltRemaining -= CurrLegalNumElt; + Alignment = commonAlignment(Alignment.valueOrOne(), CurrLegalBytes); + } + } + + // Was that all, or do we still have remaining elements? + if (NumEltRemaining <= 0) + return Cost; + + // The rest will have to be scalarized + // FIXME: we could coalesce loads, but getScalarizationOverhead() won't cope. + // FIXME: what are the legality rules for performing wider scalar loads? + Cost += countPopulation((unsigned)NumEltRemaining); + APInt DemandedElts = APInt::getBitsSet(SrcNumElt, NumEltDone(), SrcNumElt); + Cost += + getScalarizationOverhead(VTy, DemandedElts, Opcode == Instruction::Load, + Opcode == Instruction::Store); return Cost; } diff --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll --- a/llvm/test/Analysis/CostModel/X86/load_store.ll +++ b/llvm/test/Analysis/CostModel/X86/load_store.ll @@ -22,19 +22,19 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -54,19 +54,19 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -86,19 +86,19 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -160,19 +160,19 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -192,19 +192,19 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -224,19 +224,19 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -289,28 +289,28 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -321,28 +321,28 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -353,30 +353,30 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 4 @@ -433,24 +433,24 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'loads_align' @@ -465,24 +465,24 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'loads_align' @@ -497,24 +497,24 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 64 -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 64