diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -519,57 +519,6 @@ TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { - EVT OrigTy = TLI->getValueType(DL, Ty); - if (!OrigTy.isSimple()) { - // FIXME: We're having to query the throughput cost so that the basic - // implementation tries to generate legalize and scalarization costs. Maybe - // we could hoist the scalarization code here? - if (CostKind != TTI::TCK_CodeSize) - return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput, - Opd1Info, Opd2Info, Opd1PropInfo, - Opd2PropInfo, Args, CxtI); - // Scalarization - - // Check if any of the operands are vector operands. - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); - - bool IsFloat = Ty->isFPOrFPVectorTy(); - // Assume that floating point arithmetic operations cost twice as much as - // integer operations. - unsigned OpCost = (IsFloat ? 2 : 1); - - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. - // TODO: Once we have extract/insert subvector cost we need to use them. - return LT.first * OpCost; - } - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered, then assume that the code is twice - // as expensive. - return LT.first * 2 * OpCost; - } - - // Else, assume that we need to scalarize this op. - // TODO: If one of the types get legalized by splitting, handle this - // similarly to what getCastInstrCost() does. - if (auto *VTy = dyn_cast(Ty)) { - unsigned Num = cast(VTy)->getNumElements(); - InstructionCost Cost = getArithmeticInstrCost( - Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args, CxtI); - // Return the cost of multiple scalar invocation plus the cost of - // inserting and extracting the values. - SmallVector Tys(Args.size(), Ty); - return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost; - } - - // We don't know anything about this scalar instruction. - return OpCost; - } // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12477,6 +12477,6 @@ if (Size <= 256) return Cost; - Cost.first = (Size + 255) / 256; + Cost.first += (Size + 255) / 256; return Cost; } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll @@ -15,7 +15,7 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'add_i32' @@ -27,7 +27,7 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i32 = add i32 undef, undef @@ -38,7 +38,7 @@ %v6i32 = add <6 x i32> undef, undef %v7i32 = add <7 x i32> undef, undef %v8i32 = add <8 x i32> undef, undef - %v32i32 = add <32 x i32> undef, undef + %v9i32 = add <9 x i32> undef, undef ret void } @@ -48,11 +48,7 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'add_i64' @@ -60,11 +56,7 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i64 = add i64 undef, undef @@ -72,10 +64,6 @@ %v3i64 = add <3 x i64> undef, undef %v4i64 = add <4 x i64> undef, undef %v5i64 = add <5 x i64> undef, undef - %v6i64 = add <6 x i64> undef, undef - %v7i64 = add <7 x i64> undef, undef - %v8i64 = add <8 x i64> undef, undef - %v16i64 = add <16 x i64> undef, undef ret void } @@ -87,6 +75,8 @@ ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-LABEL: 'add_i16' @@ -96,6 +86,8 @@ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'add_i16' @@ -105,6 +97,8 @@ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'add_i16' @@ -114,6 +108,8 @@ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i16 = add i16 undef, undef @@ -122,6 +118,8 @@ %v4i16 = add <4 x i16> undef, undef %v5i16 = add <5 x i16> undef, undef %v6i16 = add <6 x i16> undef, undef + %v16i16 = add <16 x i16> undef, undef + %v17i16 = add <17 x i16> undef, undef ret void } @@ -133,6 +131,8 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'add_i8' @@ -142,6 +142,8 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i8 = add i8 undef, undef @@ -150,12 +152,14 @@ %v4i8 = add <4 x i8> undef, undef %v5i8 = add <5 x i8> undef, undef %v6i8 = add <6 x i8> undef, undef + %v32i8 = add <32 x i8> undef, undef + %v33i8 = add <33 x i8> undef, undef ret void } define amdgpu_kernel void @sub() #0 { ; FAST16-LABEL: 'sub' -; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef @@ -165,7 +169,7 @@ ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-LABEL: 'sub' -; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef @@ -175,7 +179,7 @@ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'sub' -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef @@ -185,7 +189,7 @@ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'sub' -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef @@ -194,7 +198,7 @@ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; - %i8 = sub i16 undef, undef + %i8 = sub i8 undef, undef %i16 = sub i16 undef, undef %i32 = sub i32 undef, undef %i64 = sub i64 undef, undef diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll @@ -10,6 +10,8 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'fabs_f32' @@ -18,6 +20,8 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = call float @llvm.fabs.f32(float undef) #1 @@ -25,6 +29,8 @@ %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #1 %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #1 %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #1 + %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #1 + %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #1 ret void } @@ -34,6 +40,7 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'fabs_f64' @@ -41,12 +48,14 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = call double @llvm.fabs.f64(double undef) #1 %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #1 %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #1 %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #1 + %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #1 ret void } @@ -57,6 +66,8 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #2 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #2 ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'fabs_f16' @@ -65,6 +76,8 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #2 ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = call half @llvm.fabs.f16(half undef) #1 @@ -72,6 +85,8 @@ %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #1 %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #1 %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #1 + %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #1 + %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #1 ret void } @@ -80,17 +95,22 @@ declare <3 x float> @llvm.fabs.v3f32(<3 x float>) #1 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1 declare <5 x float> @llvm.fabs.v5f32(<5 x float>) #1 +declare <8 x float> @llvm.fabs.v8f32(<8 x float>) #1 +declare <9 x float> @llvm.fabs.v9f32(<9 x float>) #1 declare double @llvm.fabs.f64(double) #1 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #1 declare <3 x double> @llvm.fabs.v3f64(<3 x double>) #1 declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1 +declare <5 x double> @llvm.fabs.v5f64(<5 x double>) #1 declare half @llvm.fabs.f16(half) #1 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1 declare <3 x half> @llvm.fabs.v3f16(<3 x half>) #1 declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1 declare <5 x half> @llvm.fabs.v5f16(<5 x half>) #1 +declare <16 x half> @llvm.fabs.v16f16(<16 x half>) #1 +declare <17 x half> @llvm.fabs.v17f16(<17 x half>) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll @@ -14,6 +14,8 @@ ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; NOPACKEDF32-LABEL: 'fadd_f32' @@ -22,6 +24,8 @@ ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef +; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef +; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f32' @@ -30,6 +34,8 @@ ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; NOPACKEDF32-SIZE-LABEL: 'fadd_f32' @@ -38,6 +44,8 @@ ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fadd float undef, undef @@ -45,6 +53,8 @@ %v3f32 = fadd <3 x float> undef, undef %v4f32 = fadd <4 x float> undef, undef %v5f32 = fadd <5 x float> undef, undef + %v8f32 = fadd <8 x float> undef, undef + %v9f32 = fadd <9 x float> undef, undef ret void } @@ -54,7 +64,7 @@ ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef -; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fadd <5 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF64-LABEL: 'fadd_f64' @@ -62,7 +72,7 @@ ; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef -; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fadd <5 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fadd_f64' @@ -70,7 +80,7 @@ ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fadd <2 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fadd <3 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fadd <4 x double> undef, undef -; SLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fadd <5 x double> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fadd <5 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f64' @@ -78,7 +88,7 @@ ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef -; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fadd <5 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; NOPACKEDF32-SIZE-LABEL: 'fadd_f64' @@ -86,7 +96,7 @@ ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef -; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fadd <5 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fadd double undef, undef @@ -103,7 +113,9 @@ ; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef -; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fadd_f16' @@ -111,7 +123,9 @@ ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef -; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fadd <16 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fadd <17 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF16-SIZE-LABEL: 'fadd_f16' @@ -119,7 +133,9 @@ ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef -; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOWF64-SIZE-LABEL: 'fadd_f16' @@ -127,7 +143,9 @@ ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef -; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fadd <16 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fadd <17 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fadd half undef, undef @@ -135,6 +153,8 @@ %v3f16 = fadd <3 x half> undef, undef %v4f16 = fadd <4 x half> undef, undef %v5f16 = fadd <5 x half> undef, undef + %v16f16 = fadd <16 x half> undef, undef + %v17f16 = fadd <17 x half> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -19,6 +19,8 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'fdiv_f32_ieee' @@ -27,6 +29,8 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v8f32 = fdiv <8 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v9f32 = fdiv <9 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fdiv float undef, undef @@ -34,6 +38,8 @@ %v3f32 = fdiv <3 x float> undef, undef %v4f32 = fdiv <4 x float> undef, undef %v5f32 = fdiv <5 x float> undef, undef + %v8f32 = fdiv <8 x float> undef, undef + %v9f32 = fdiv <9 x float> undef, undef ret void } @@ -44,6 +50,8 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8f32 = fdiv <8 x float> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v9f32 = fdiv <9 x float> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'fdiv_f32_ftzdaz' @@ -52,6 +60,8 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fdiv float undef, undef @@ -59,6 +69,8 @@ %v3f32 = fdiv <3 x float> undef, undef %v4f32 = fdiv <4 x float> undef, undef %v5f32 = fdiv <5 x float> undef, undef + %v8f32 = fdiv <8 x float> undef, undef + %v9f32 = fdiv <9 x float> undef, undef ret void } @@ -68,7 +80,7 @@ ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'fdiv_f64' @@ -76,7 +88,7 @@ ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'fdiv_f64' @@ -84,7 +96,7 @@ ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'fdiv_f64' @@ -92,7 +104,7 @@ ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'fdiv_f64' @@ -100,7 +112,7 @@ ; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'fdiv_f64' @@ -108,7 +120,7 @@ ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'fdiv_f64' @@ -116,7 +128,7 @@ ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'fdiv_f64' @@ -124,7 +136,7 @@ ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fdiv double undef, undef @@ -141,7 +153,9 @@ ; NOFP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'fdiv_f16_f32ieee' @@ -149,7 +163,9 @@ ; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee' @@ -157,7 +173,9 @@ ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'fdiv_f16_f32ieee' @@ -165,7 +183,9 @@ ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half undef, undef @@ -173,6 +193,8 @@ %v3f16 = fdiv <3 x half> undef, undef %v4f16 = fdiv <4 x half> undef, undef %v5f16 = fdiv <5 x half> undef, undef + %v16f16 = fdiv <16 x half> undef, undef + %v17f16 = fdiv <17 x half> undef, undef ret void } @@ -182,7 +204,9 @@ ; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; NOFP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; NOFP16-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'fdiv_f16_f32ftzdaz' @@ -190,7 +214,9 @@ ; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; FP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz' @@ -198,7 +224,9 @@ ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; NOFP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz' @@ -206,7 +234,9 @@ ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half undef, undef @@ -214,6 +244,8 @@ %v3f16 = fdiv <3 x half> undef, undef %v4f16 = fdiv <4 x half> undef, undef %v5f16 = fdiv <5 x half> undef, undef + %v16f16 = fdiv <16 x half> undef, undef + %v17f16 = fdiv <17 x half> undef, undef ret void } @@ -223,7 +255,7 @@ ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -233,7 +265,7 @@ ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'rcp_ieee' @@ -241,7 +273,7 @@ ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -251,7 +283,7 @@ ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'rcp_ieee' @@ -259,7 +291,7 @@ ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -269,7 +301,7 @@ ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'rcp_ieee' @@ -277,7 +309,7 @@ ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -287,7 +319,7 @@ ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'rcp_ieee' @@ -295,7 +327,7 @@ ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -305,7 +337,7 @@ ; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'rcp_ieee' @@ -313,7 +345,7 @@ ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -323,7 +355,7 @@ ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'rcp_ieee' @@ -331,7 +363,7 @@ ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -341,7 +373,7 @@ ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'rcp_ieee' @@ -349,7 +381,7 @@ ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -359,7 +391,7 @@ ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half 1.0, undef @@ -396,7 +428,7 @@ ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> , undef -; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> , undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> , undef ; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CISLOWF64-LABEL: 'rcp_ftzdaz' @@ -414,7 +446,7 @@ ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef ; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIFASTF64-LABEL: 'rcp_ftzdaz' @@ -432,7 +464,7 @@ ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> , undef -; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> , undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> , undef ; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SISLOWF64-LABEL: 'rcp_ftzdaz' @@ -450,7 +482,7 @@ ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> , undef -; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> , undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> , undef ; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FP16-LABEL: 'rcp_ftzdaz' @@ -458,7 +490,7 @@ ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -468,7 +500,7 @@ ; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> , undef ; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; CI-SIZE-LABEL: 'rcp_ftzdaz' @@ -476,7 +508,7 @@ ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -486,7 +518,7 @@ ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef ; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SI-SIZE-LABEL: 'rcp_ftzdaz' @@ -494,7 +526,7 @@ ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -504,7 +536,7 @@ ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> , undef -; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> , undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> , undef ; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FP16-SIZE-LABEL: 'rcp_ftzdaz' @@ -512,7 +544,7 @@ ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> , undef @@ -522,7 +554,7 @@ ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> , undef -; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> , undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> , undef ; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fdiv half 1.0, undef diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll @@ -14,6 +14,8 @@ ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; F32-LABEL: 'fmul_f32' @@ -22,6 +24,8 @@ ; F32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef ; F32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef ; F32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef +; F32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef +; F32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef ; F32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-SIZE-LABEL: 'fmul_f32' @@ -30,6 +34,8 @@ ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmul_f32' @@ -38,6 +44,8 @@ ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fmul float undef, undef @@ -45,6 +53,8 @@ %v3f32 = fmul <3 x float> undef, undef %v4f32 = fmul <4 x float> undef, undef %v5f32 = fmul <5 x float> undef, undef + %v8f32 = fmul <8 x float> undef, undef + %v9f32 = fmul <9 x float> undef, undef ret void } @@ -54,7 +64,7 @@ ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef -; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF64-LABEL: 'fmul_f64' @@ -62,7 +72,7 @@ ; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef -; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW-LABEL: 'fmul_f64' @@ -70,7 +80,7 @@ ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fmul <3 x double> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fmul <4 x double> undef, undef -; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fmul <5 x double> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fmul <5 x double> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-SIZE-LABEL: 'fmul_f64' @@ -78,7 +88,7 @@ ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef -; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef +; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef ; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmul_f64' @@ -86,7 +96,7 @@ ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fmul double undef, undef @@ -103,7 +113,9 @@ ; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef ; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef ; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef -; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef +; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef +; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef +; GFX9-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef ; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW-LABEL: 'fmul_f16' @@ -111,7 +123,9 @@ ; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-SIZE-LABEL: 'fmul_f16' @@ -119,7 +133,9 @@ ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef -; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef +; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef +; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef +; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef ; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW-SIZE-LABEL: 'fmul_f16' @@ -127,7 +143,9 @@ ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef -; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef ; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fmul half undef, undef @@ -135,6 +153,8 @@ %v3f16 = fmul <3 x half> undef, undef %v4f16 = fmul <4 x half> undef, undef %v5f16 = fmul <5 x half> undef, undef + %v16f16 = fmul <16 x half> undef, undef + %v17f16 = fmul <17 x half> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll @@ -10,6 +10,8 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = fneg <8 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = fneg <9 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIZE-LABEL: 'fneg_f32' @@ -18,6 +20,8 @@ ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = fneg <8 x float> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = fneg <9 x float> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fneg float undef @@ -25,6 +29,8 @@ %v3f32 = fneg <3 x float> undef %v4f32 = fneg <4 x float> undef %v5f32 = fneg <5 x float> undef + %v8f32 = fneg <8 x float> undef + %v9f32 = fneg <9 x float> undef ret void } @@ -59,7 +65,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = fneg <5 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = fneg <16 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = fneg <17 x half> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SIZE-LABEL: 'fneg_f16' @@ -67,7 +75,9 @@ ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = fneg <5 x half> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = fneg <16 x half> undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = fneg <17 x half> undef ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fneg half undef @@ -75,5 +85,7 @@ %v3f16 = fneg <3 x half> undef %v4f16 = fneg <4 x half> undef %v5f16 = fneg <5 x half> undef + %v16f16 = fneg <16 x half> undef + %v17f16 = fneg <17 x half> undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll @@ -14,6 +14,8 @@ ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; NOPACKEDF32-LABEL: 'fsub_f32' @@ -22,6 +24,8 @@ ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef +; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef +; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32' @@ -30,6 +34,8 @@ ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; NOPACKEDF32-SIZE-LABEL: 'fsub_f32' @@ -38,6 +44,8 @@ ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = fsub float undef, undef @@ -45,6 +53,8 @@ %v3f32 = fsub <3 x float> undef, undef %v4f32 = fsub <4 x float> undef, undef %v5f32 = fsub <5 x float> undef, undef + %v8f32 = fsub <8 x float> undef, undef + %v9f32 = fsub <9 x float> undef, undef ret void } @@ -54,7 +64,7 @@ ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef -; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef +; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF64-LABEL: 'fsub_f64' @@ -62,7 +72,7 @@ ; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef -; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef +; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef ; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fsub_f64' @@ -70,7 +80,7 @@ ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fsub <4 x double> undef, undef -; SLOWF64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fsub <5 x double> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fsub <5 x double> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f64' @@ -78,7 +88,7 @@ ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef -; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef +; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; NOPACKEDF32-SIZE-LABEL: 'fsub_f64' @@ -86,7 +96,7 @@ ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef -; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef +; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f64 = fsub double undef, undef @@ -103,7 +113,9 @@ ; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef -; FASTF16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef +; FASTF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef ; FASTF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOWF64-LABEL: 'fsub_f16' @@ -111,7 +123,9 @@ ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef -; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef +; SLOWF64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FASTF16-SIZE-LABEL: 'fsub_f16' @@ -119,7 +133,9 @@ ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef -; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef +; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOWF64-SIZE-LABEL: 'fsub_f16' @@ -127,7 +143,9 @@ ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef -; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef +; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f16 = fsub half undef, undef @@ -135,5 +153,7 @@ %v3f16 = fsub <3 x half> undef, undef %v4f16 = fsub <4 x half> undef, undef %v5f16 = fsub <5 x half> undef, undef + %v16f16 = fsub <16 x half> undef, undef + %v17f16 = fsub <17 x half> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll @@ -12,6 +12,8 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i32 = mul <3 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i32 = mul <4 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i32 = mul <5 x i32> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i32 = mul <8 x i32> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v9i32 = mul <9 x i32> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'mul_i32' @@ -20,6 +22,8 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i32 = mul <3 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = mul <4 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i32 = mul <5 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = mul <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v9i32 = mul <9 x i32> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i32 = mul i32 undef, undef @@ -27,6 +31,8 @@ %v3i32 = mul <3 x i32> undef, undef %v4i32 = mul <4 x i32> undef, undef %v5i32 = mul <5 x i32> undef, undef + %v8i32 = mul <8 x i32> undef, undef + %v9i32 = mul <9 x i32> undef, undef ret void } @@ -36,8 +42,7 @@ ; ALL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2i64 = mul <2 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v3i64 = mul <3 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = mul <4 x i64> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v5i64 = mul <4 x i64> undef, undef -; ALL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v8i64 = mul <8 x i64> undef, undef +; ALL-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %v5i64 = mul <5 x i64> undef, undef ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'mul_i64' @@ -45,16 +50,14 @@ ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i64 = mul <2 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v3i64 = mul <3 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4i64 = mul <4 x i64> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = mul <4 x i64> undef, undef -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = mul <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %v5i64 = mul <5 x i64> undef, undef ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i64 = mul i64 undef, undef %v2i64 = mul <2 x i64> undef, undef %v3i64 = mul <3 x i64> undef, undef %v4i64 = mul <4 x i64> undef, undef - %v5i64 = mul <4 x i64> undef, undef - %v8i64 = mul <8 x i64> undef, undef + %v5i64 = mul <5 x i64> undef, undef ret void } @@ -64,7 +67,9 @@ ; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16 = mul <2 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3i16 = mul <3 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i16 = mul <4 x i16> undef, undef -; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v5i16 = mul <5 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i16 = mul <16 x i16> undef, undef +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v17i16 = mul <17 x i16> undef, undef ; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST16-LABEL: 'mul_i16' @@ -72,7 +77,9 @@ ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef -; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v17i16 = mul <17 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'mul_i16' @@ -80,7 +87,9 @@ ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef -; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v17i16 = mul <17 x i16> undef, undef ; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; FAST16-SIZE-LABEL: 'mul_i16' @@ -88,7 +97,9 @@ ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = mul <2 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = mul <3 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = mul <4 x i16> undef, undef -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = mul <16 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v17i16 = mul <17 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i16 = mul i16 undef, undef @@ -96,6 +107,8 @@ %v3i16 = mul <3 x i16> undef, undef %v4i16 = mul <4 x i16> undef, undef %v5i16 = mul <5 x i16> undef, undef + %v16i16 = mul <16 x i16> undef, undef + %v17i16 = mul <17 x i16> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll @@ -26,7 +26,7 @@ ; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef -; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW64-LABEL: 'shl' @@ -49,7 +49,7 @@ ; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = shl <2 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = shl <3 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = shl <4 x i64> undef, undef -; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = shl <5 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = shl <5 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST64-SIZE-LABEL: 'shl' @@ -72,7 +72,7 @@ ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW64-SIZE-LABEL: 'shl' @@ -95,7 +95,7 @@ ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i8 = shl i8 undef, undef @@ -142,7 +142,7 @@ ; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef -; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW64-LABEL: 'lshr' @@ -165,7 +165,7 @@ ; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = lshr <2 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = lshr <3 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = lshr <4 x i64> undef, undef -; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = lshr <5 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST64-SIZE-LABEL: 'lshr' @@ -188,7 +188,7 @@ ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW64-SIZE-LABEL: 'lshr' @@ -211,7 +211,7 @@ ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i8 = lshr i8 undef, undef @@ -258,7 +258,7 @@ ; FAST64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef -; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef +; FAST64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; FAST64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW64-LABEL: 'ashr' @@ -281,7 +281,7 @@ ; SLOW64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = ashr <2 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = ashr <3 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = ashr <4 x i64> undef, undef -; SLOW64-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = ashr <5 x i64> undef, undef +; SLOW64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; SLOW64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; FAST64-SIZE-LABEL: 'ashr' @@ -304,7 +304,7 @@ ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef -; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef +; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; FAST64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW64-SIZE-LABEL: 'ashr' @@ -327,7 +327,7 @@ ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef -; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef +; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef ; SLOW64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %i8 = ashr i8 undef, undef