diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3209,42 +3209,48 @@ return TTI::TCC_Basic; } - // Handle non-power-of-two vectors such as <3 x float> - if (auto *VTy = dyn_cast(Src)) { - unsigned NumElem = VTy->getNumElements(); - - // Handle a few common cases: - // <3 x float> - if (NumElem == 3 && VTy->getScalarSizeInBits() == 32) - // Cost = 64 bit store + extract + 32 bit store. - return 3; - - // <3 x double> - if (NumElem == 3 && VTy->getScalarSizeInBits() == 64) - // Cost = 128 bit store + unpack + 64 bit store. - return 3; - - // Assume that all other non-power-of-two numbers are scalarized. - if (!isPowerOf2_32(NumElem)) { - APInt DemandedElts = APInt::getAllOnesValue(NumElem); - InstructionCost Cost = BaseT::getMemoryOpCost( - Opcode, VTy->getScalarType(), Alignment, AddressSpace, CostKind); - int SplitCost = getScalarizationOverhead(VTy, DemandedElts, - Opcode == Instruction::Load, - Opcode == Instruction::Store); - return NumElem * Cost + SplitCost; - } - } - + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); // Type legalization can't handle structs - if (TLI->getValueType(DL, Src, true) == MVT::Other) + if (TLI->getValueType(DL, Src, true) == MVT::Other) return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); + // Handle non-power-of-two vectors such as <3 x float> and <48 x i16> + if (auto *VTy = dyn_cast(Src)) { + const unsigned NumElem = VTy->getNumElements(); + if (!isPowerOf2_32(NumElem)) { + // Factorize NumElem into sum of power-of-two. + InstructionCost Cost = 0; + unsigned NumElemDone = 0; + for (unsigned NumElemLeft = NumElem, Factor; + Factor = PowerOf2Floor(NumElemLeft), NumElemLeft > 0; + NumElemLeft -= Factor) { + Type *SubTy = FixedVectorType::get(VTy->getScalarType(), Factor); + unsigned SubTyBytes = SubTy->getPrimitiveSizeInBits() / 8; + + Cost += + getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind); + + std::pair LST = TLI->getTypeLegalizationCost(DL, SubTy); + if (!LST.second.isVector()) { + APInt DemandedElts = + APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor); + Cost += getScalarizationOverhead(VTy, DemandedElts, + Opcode == Instruction::Load, + Opcode == Instruction::Store); + } + + NumElemDone += Factor; + Alignment = commonAlignment(Alignment.valueOrOne(), SubTyBytes); + } + assert(NumElemDone == NumElem && "Processed wrong element count?"); + return Cost; + } + } + // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && - "Invalid Opcode"); // Each load/store unit costs 1. int Cost = LT.first * 1; diff --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll --- a/llvm/test/Analysis/CostModel/X86/load_store.ll +++ b/llvm/test/Analysis/CostModel/X86/load_store.ll @@ -20,23 +20,23 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'stores' @@ -54,21 +54,21 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 117 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 122 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'stores' @@ -86,21 +86,21 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 61 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 125 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <47 x i16> undef, <47 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i16> undef, <48 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i16> undef, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; store i8 undef, i8* undef, align 4 @@ -157,24 +157,24 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; SSE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'loads' @@ -190,23 +190,23 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'loads' @@ -222,23 +222,23 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 -; AVX512-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <5 x i16>, <5 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <6 x i16>, <6 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <7 x i16>, <7 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <11 x i16>, <11 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <12 x i16>, <12 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <13 x i16>, <13 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = load <23 x i16>, <23 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = load <24 x i16>, <24 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = load <25 x i16>, <25 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = load <47 x i16>, <47 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <48 x i16>, <48 x i16>* undef, align 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <49 x i16>, <49 x i16>* undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; load i8, i8* undef, align 4