This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Improve reduction fadd/fmul costs
ClosedPublic

Authored by dmgreen on Sep 1 2023, 10:23 AM.

Download Raw Diff

Details

Reviewers

samtebbs
NickGuy
SjoerdMeijer

Commits

rG4530f0291664: [ARM] Improve reduction fadd/fmul costs

Summary

This adds some basic fadd/fmul reduction costs for MVE/NEON. It reduces by halving the vector size until it it gets scalarized, with some additional costs for fp16 which may require extracting the top lanes.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

dmgreen created this revision.Sep 1 2023, 10:23 AM

Herald added a project: Restricted Project. · View Herald TranscriptSep 1 2023, 10:23 AM

Herald added subscribers: hiraditya, kristof.beyls. · View Herald Transcript

dmgreen requested review of this revision.Sep 1 2023, 10:23 AM

Herald added a project: Restricted Project. · View Herald TranscriptSep 1 2023, 10:23 AM

Harbormaster completed remote builds in B256297: Diff 555428.Sep 1 2023, 10:24 AM

Looks reasonable

This revision is now accepted and ready to land.Sep 4 2023, 2:08 AM

dmgreen mentioned this in rG5afb161ed57d: [ARM] Add various vector reduce costmodel tests. NFC.Sep 4 2023, 2:51 AM

This revision was landed with ongoing or failed builds.Sep 4 2023, 3:37 AM

Closed by commit rG4530f0291664: [ARM] Improve reduction fadd/fmul costs (authored by dmgreen). · Explain Why

This revision was automatically updated to reflect the committed changes.

dmgreen added a commit: rG4530f0291664: [ARM] Improve reduction fadd/fmul costs.

Revision Contents

Path

Size

llvm/

lib/

Target/

ARM/

ARMTargetTransformInfo.cpp

39 lines

test/

Analysis/

CostModel/

ARM/

reduce-fp.ll

96 lines

Diff 555700

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Show First 20 Lines • Show All 1,664 Lines • ▼ Show 20 Lines	InstructionCost ARMTTIImpl::getGatherScatterOpCost(
}		}
return ScalarCost;		return ScalarCost;
}		}

InstructionCost		InstructionCost
ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,		ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
std::optional<FastMathFlags> FMF,		std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {		TTI::TargetCostKind CostKind) {
if (TTI::requiresOrderedReduction(FMF))
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);

EVT ValVT = TLI->getValueType(DL, ValTy);		EVT ValVT = TLI->getValueType(DL, ValTy);
int ISD = TLI->InstructionOpcodeToISD(Opcode);		int ISD = TLI->InstructionOpcodeToISD(Opcode);
if (!ST->hasMVEIntegerOps() \|\| !ValVT.isSimple() \|\| ISD != ISD::ADD)		unsigned EltSize = ValVT.getScalarSizeInBits();

		// In general floating point reductions are a series of elementwise
		// operations, with free extracts on each step. These are either in-order or
		// treewise depending on whether that is allowed by the fast math flags.
		if ((ISD == ISD::FADD \|\| ISD == ISD::FMUL) &&
		((EltSize == 32 && ST->hasVFP2Base()) \|\|
		(EltSize == 64 && ST->hasFP64()) \|\|
		(EltSize == 16 && ST->hasFullFP16()))) {
		unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
		unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
		InstructionCost VecCost = 0;
		while (!TTI::requiresOrderedReduction(FMF) && isPowerOf2_32(NumElts) &&
		NumElts * EltSize > VecLimit) {
		Type *VecTy = FixedVectorType::get(ValTy->getElementType(), NumElts / 2);
		VecCost += getArithmeticInstrCost(Opcode, VecTy, CostKind);
		NumElts /= 2;
		}

		// For fp16 we need to extract the upper lane elements. MVE can add a
		// VREV+FMIN/MAX to perform another vector step instead.
		InstructionCost ExtractCost = 0;
		if (!TTI::requiresOrderedReduction(FMF) && ST->hasMVEFloatOps() &&
		ValVT.getVectorElementType() == MVT::f16 && NumElts == 8) {
		VecCost += ST->getMVEVectorCostFactor(CostKind) * 2;
		NumElts /= 2;
		} else if (ValVT.getVectorElementType() == MVT::f16)
		ExtractCost = NumElts / 2;

		return VecCost + ExtractCost +
		NumElts *
		getArithmeticInstrCost(Opcode, ValTy->getElementType(), CostKind);
		}

		if (!ST->hasMVEIntegerOps() \|\| !ValVT.isSimple() \|\| ISD != ISD::ADD \|\|
		TTI::requiresOrderedReduction(FMF))
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);		return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);

std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);		std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

static const CostTblEntry CostTblAdd[]{		static const CostTblEntry CostTblAdd[]{
{ISD::ADD, MVT::v16i8, 1},		{ISD::ADD, MVT::v16i8, 1},
{ISD::ADD, MVT::v8i16, 1},		{ISD::ADD, MVT::v8i16, 1},
{ISD::ADD, MVT::v4i32, 1},		{ISD::ADD, MVT::v4i32, 1},
▲ Show 20 Lines • Show All 780 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/ARM/reduce-fp.ll

	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
	; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output \| FileCheck %s --check-prefix=CHECK-V8			; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print<cost-model>" 2>&1 -disable-output \| FileCheck %s --check-prefix=CHECK-V8
	; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output \| FileCheck %s --check-prefix=CHECK-MVEFP			; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print<cost-model>" 2>&1 -disable-output \| FileCheck %s --check-prefix=CHECK-MVEFP
	; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output \| FileCheck %s --check-prefix=CHECK-MVEI			; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print<cost-model>" 2>&1 -disable-output \| FileCheck %s --check-prefix=CHECK-MVEI

	target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

	define void @fadd_strict() {			define void @fadd_strict() {
	; CHECK-V8-LABEL: 'fadd_strict'			; CHECK-V8-LABEL: 'fadd_strict'
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEFP-LABEL: 'fadd_strict'			; CHECK-MVEFP-LABEL: 'fadd_strict'
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEI-LABEL: 'fadd_strict'			; CHECK-MVEI-LABEL: 'fadd_strict'
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
	Show All 22 Lines


	define void @fadd_unordered() {			define void @fadd_unordered() {
	; CHECK-V8-LABEL: 'fadd_unordered'			; CHECK-V8-LABEL: 'fadd_unordered'
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEFP-LABEL: 'fadd_unordered'			; CHECK-MVEFP-LABEL: 'fadd_unordered'
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEI-LABEL: 'fadd_unordered'			; CHECK-MVEI-LABEL: 'fadd_unordered'
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
	Show All 21 Lines
	}			}

	define void @fmul_strict() {			define void @fmul_strict() {
	; CHECK-V8-LABEL: 'fmul_strict'			; CHECK-V8-LABEL: 'fmul_strict'
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEFP-LABEL: 'fmul_strict'			; CHECK-MVEFP-LABEL: 'fmul_strict'
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEI-LABEL: 'fmul_strict'			; CHECK-MVEI-LABEL: 'fmul_strict'
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
	Show All 22 Lines


	define void @fmul_unordered() {			define void @fmul_unordered() {
	; CHECK-V8-LABEL: 'fmul_unordered'			; CHECK-V8-LABEL: 'fmul_unordered'
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEFP-LABEL: 'fmul_unordered'			; CHECK-MVEFP-LABEL: 'fmul_unordered'
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef)
	; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; CHECK-MVEI-LABEL: 'fmul_unordered'			; CHECK-MVEI-LABEL: 'fmul_unordered'
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef)
	; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)			; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef)
	▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines