Diff 527801

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,237 Lines • ▼ Show 20 Lines	CallInst *MaskedStore = IC.Builder.CreateMaskedStore(
VecOp, VecPtr, PtrOp->getPointerAlignment(DL), Pred);		VecOp, VecPtr, PtrOp->getPointerAlignment(DL), Pred);
MaskedStore->copyMetadata(II);		MaskedStore->copyMetadata(II);
return IC.eraseInstFromFunction(II);		return IC.eraseInstFromFunction(II);
}		}

static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {		static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
switch (Intrinsic) {		switch (Intrinsic) {
case Intrinsic::aarch64_sve_fmul:		case Intrinsic::aarch64_sve_fmul:
		case Intrinsic::aarch64_sve_fmul_u:
return Instruction::BinaryOps::FMul;		return Instruction::BinaryOps::FMul;
case Intrinsic::aarch64_sve_fadd:		case Intrinsic::aarch64_sve_fadd:
		case Intrinsic::aarch64_sve_fadd_u:
return Instruction::BinaryOps::FAdd;		return Instruction::BinaryOps::FAdd;
case Intrinsic::aarch64_sve_fsub:		case Intrinsic::aarch64_sve_fsub:
		case Intrinsic::aarch64_sve_fsub_u:
return Instruction::BinaryOps::FSub;		return Instruction::BinaryOps::FSub;
default:		default:
return Instruction::BinaryOpsEnd;		return Instruction::BinaryOpsEnd;
}		}
}		}

static std::optional<Instruction *>		static std::optional<Instruction *>
instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {		instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
Show All 29 Lines	if (auto FMAD =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,		instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
Intrinsic::aarch64_sve_fmad>(IC, II,		Intrinsic::aarch64_sve_fmad>(IC, II,
false))		false))
return FMAD;		return FMAD;
if (auto MAD = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,		if (auto MAD = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mad>(		Intrinsic::aarch64_sve_mad>(
IC, II, false))		IC, II, false))
return MAD;		return MAD;
		if (auto FMLA_U =
		instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
		Intrinsic::aarch64_sve_fmla_u>(
		IC, II, true))
		return FMLA_U;
return instCombineSVEVectorBinOp(IC, II);		return instCombineSVEVectorBinOp(IC, II);
}		}
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Is it possible to maintain the original structure of this function by just adding the new `instCombineSVEVectorFuseMulAddSub` entry for aarch64_sve_fmul_u? e.g. if (auto FMLA_U = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u, Intrinsic::aarch64_sve_fmla_u>( IC, II, true)) return FMLA_U; return instCombineSVEVectorBinOp(IC, II); paulwalker-arm: Is it possible to maintain the original structure of this function by just adding the new…
		jolanta.jensenAuthorUnsubmitted Done Reply Inline Actions Yes, there was no fallout from the move. It looks much better now. jolanta.jensen: Yes, there was no fallout from the move. It looks much better now.

static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,		static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
IntrinsicInst &II) {		IntrinsicInst &II) {
if (auto FMLS =		if (auto FMLS =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,		instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
Intrinsic::aarch64_sve_fmls>(IC, II,		Intrinsic::aarch64_sve_fmls>(IC, II,
true))		true))
return FMLS;		return FMLS;
if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,		if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mls>(		Intrinsic::aarch64_sve_mls>(
IC, II, true))		IC, II, true))
return MLS;		return MLS;
if (auto FMSB =		if (auto FMSB =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,		instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
Intrinsic::aarch64_sve_fnmsb>(		Intrinsic::aarch64_sve_fnmsb>(
IC, II, false))		IC, II, false))
return FMSB;		return FMSB;
		if (auto FMLS_U =
		instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
		Intrinsic::aarch64_sve_fmls_u>(
		IC, II, true))
		return FMLS_U;
return instCombineSVEVectorBinOp(IC, II);		return instCombineSVEVectorBinOp(IC, II);
}		}
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Same comment as above relating to maintain the function original structure. paulwalker-arm: Same comment as above relating to maintain the function original structure.
		jolanta.jensenAuthorUnsubmitted Done Reply Inline Actions Fixed. jolanta.jensen: Fixed.

static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,		static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
IntrinsicInst &II) {		IntrinsicInst &II) {
auto *OpPredicate = II.getOperand(0);		auto *OpPredicate = II.getOperand(0);
auto *OpMultiplicand = II.getOperand(1);		auto *OpMultiplicand = II.getOperand(1);
auto *OpMultiplier = II.getOperand(2);		auto *OpMultiplier = II.getOperand(2);

// Return true if a given instruction is a unit splat value, false otherwise.		// Return true if a given instruction is a unit splat value, false otherwise.
auto IsUnitSplat = [](auto *I) {		auto IsUnitSplat = [](auto *I) {
auto *SplatValue = getSplatValue(I);		auto *SplatValue = getSplatValue(I);
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I'd rather the `instCombineSVEAllActive2VA` and `instCombineSVEAllActive3VA` changes be rolled into a separate patch. That way we have this patch focusing on the existing combines that are being ported to include the newer intrinsics and then a following patch to add new combines for everything else. paulwalker-arm: I'd rather the `instCombineSVEAllActive2VA` and `instCombineSVEAllActive3VA` changes be rolled…
		jolanta.jensenAuthorUnsubmitted Done Reply Inline Actions Removed IR combines from this patch. jolanta.jensen: Removed IR combines from this patch.
if (!SplatValue)		if (!SplatValue)
return false;		return false;
return match(SplatValue, m_FPOne()) \|\| match(SplatValue, m_One());		return match(SplatValue, m_FPOne()) \|\| match(SplatValue, m_One());
};		};

// Return true if a given instruction is an aarch64_sve_dup intrinsic call		// Return true if a given instruction is an aarch64_sve_dup intrinsic call
// with a unit splat value, false otherwise.		// with a unit splat value, false otherwise.
auto IsUnitDup = [](auto *I) {		auto IsUnitDup = [](auto *I) {
▲ Show 20 Lines • Show All 345 Lines • ▼ Show 20 Lines	AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_cntb:		case Intrinsic::aarch64_sve_cntb:
return instCombineSVECntElts(IC, II, 16);		return instCombineSVECntElts(IC, II, 16);
case Intrinsic::aarch64_sve_ptest_any:		case Intrinsic::aarch64_sve_ptest_any:
case Intrinsic::aarch64_sve_ptest_first:		case Intrinsic::aarch64_sve_ptest_first:
case Intrinsic::aarch64_sve_ptest_last:		case Intrinsic::aarch64_sve_ptest_last:
return instCombineSVEPTest(IC, II);		return instCombineSVEPTest(IC, II);
case Intrinsic::aarch64_sve_mul:		case Intrinsic::aarch64_sve_mul:
case Intrinsic::aarch64_sve_fmul:		case Intrinsic::aarch64_sve_fmul:
		case Intrinsic::aarch64_sve_fmul_u:
return instCombineSVEVectorMul(IC, II);		return instCombineSVEVectorMul(IC, II);
case Intrinsic::aarch64_sve_fadd:		case Intrinsic::aarch64_sve_fadd:
		case Intrinsic::aarch64_sve_fadd_u:
case Intrinsic::aarch64_sve_add:		case Intrinsic::aarch64_sve_add:
return instCombineSVEVectorAdd(IC, II);		return instCombineSVEVectorAdd(IC, II);
case Intrinsic::aarch64_sve_fadd_u:
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
Intrinsic::aarch64_sve_fmla_u>(
IC, II, true);
case Intrinsic::aarch64_sve_add_u:		case Intrinsic::aarch64_sve_add_u:
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,		return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
		mgabkaUnsubmitted Not Done Reply Inline Actions Hi Jolanta, I think that code like: if (auto Replacement = instCombineSVEVectorBinOp(IC, II)) return Replacement; return instCombineSVEVectorFuseMulAddSub would be more readable here. mgabka: Hi Jolanta, I think that code like: if (auto Replacement = instCombineSVEVectorBinOp(IC, II))…
Intrinsic::aarch64_sve_mla_u>(		Intrinsic::aarch64_sve_mla_u>(
IC, II, true);		IC, II, true);
case Intrinsic::aarch64_sve_fsub:		case Intrinsic::aarch64_sve_fsub:
		case Intrinsic::aarch64_sve_fsub_u:
case Intrinsic::aarch64_sve_sub:		case Intrinsic::aarch64_sve_sub:
return instCombineSVEVectorSub(IC, II);		return instCombineSVEVectorSub(IC, II);
case Intrinsic::aarch64_sve_fsub_u:
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
Intrinsic::aarch64_sve_fmls_u>(
IC, II, true);
case Intrinsic::aarch64_sve_sub_u:		case Intrinsic::aarch64_sve_sub_u:
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,		return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
Intrinsic::aarch64_sve_mls_u>(		Intrinsic::aarch64_sve_mls_u>(
IC, II, true);		IC, II, true);
case Intrinsic::aarch64_sve_tbl:		case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);		return instCombineSVETBL(IC, II);
case Intrinsic::aarch64_sve_uunpkhi:		case Intrinsic::aarch64_sve_uunpkhi:
case Intrinsic::aarch64_sve_uunpklo:		case Intrinsic::aarch64_sve_uunpklo:
▲ Show 20 Lines • Show All 1,913 Lines • Show Last 20 Lines

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll

	; RUN: opt -S -passes=instcombine < %s \| FileCheck %s			; RUN: opt -S -passes=instcombine < %s \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)			declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)			declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)			declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)

	; SVE intrinsics fmul and fadd should be replaced with regular fmul and fadd			; SVE intrinsics fmul, fmul_u, fadd, fadd_u, fsub and fsub_u should be replaced with regular fmul, fadd and fsub.
	declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
	define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {			define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
	; CHECK-LABEL: @replace_fmul_intrinsic_half			; CHECK-LABEL: @replace_fmul_intrinsic_half
	; CHECK-NEXT: %1 = fmul fast <vscale x 8 x half> %a, %b			; CHECK-NEXT: %1 = fmul fast <vscale x 8 x half> %a, %b
	; CHECK-NEXT: ret <vscale x 8 x half> %1			; CHECK-NEXT: ret <vscale x 8 x half> %1
	%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)			%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
	%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)			%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
	ret <vscale x 8 x half> %2			ret <vscale x 8 x half> %2
	Show All 14 Lines
	; CHECK-LABEL: @replace_fmul_intrinsic_double			; CHECK-LABEL: @replace_fmul_intrinsic_double
	; CHECK-NEXT: %1 = fmul fast <vscale x 2 x double> %a, %b			; CHECK-NEXT: %1 = fmul fast <vscale x 2 x double> %a, %b
	; CHECK-NEXT: ret <vscale x 2 x double> %1			; CHECK-NEXT: ret <vscale x 2 x double> %1
	%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)			%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
	%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)			%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
	ret <vscale x 2 x double> %2			ret <vscale x 2 x double> %2
	}			}

				declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
				define <vscale x 8 x half> @replace_fmul_u_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
				; CHECK-LABEL: @replace_fmul_u_intrinsic_half
				; CHECK-NEXT: %1 = fmul fast <vscale x 8 x half> %a, %b
				; CHECK-NEXT: ret <vscale x 8 x half> %1
				%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
				%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
				ret <vscale x 8 x half> %2
				}

				declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
				define <vscale x 4 x float> @replace_fmul_u_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
				; CHECK-LABEL: @replace_fmul_u_intrinsic_float
				; CHECK-NEXT: %1 = fmul fast <vscale x 4 x float> %a, %b
				; CHECK-NEXT: ret <vscale x 4 x float> %1
				%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
				%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
				ret <vscale x 4 x float> %2
				}

				declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
				define <vscale x 2 x double> @replace_fmul_u_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
				; CHECK-LABEL: @replace_fmul_u_intrinsic_double
				; CHECK-NEXT: %1 = fmul fast <vscale x 2 x double> %a, %b
				; CHECK-NEXT: ret <vscale x 2 x double> %1
				%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
				%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
				ret <vscale x 2 x double> %2
				}

	declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
	define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {			define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
	; CHECK-LABEL: @replace_fadd_intrinsic_half			; CHECK-LABEL: @replace_fadd_intrinsic_half
	; CHECK-NEXT: %1 = fadd fast <vscale x 8 x half> %a, %b			; CHECK-NEXT: %1 = fadd fast <vscale x 8 x half> %a, %b
	; CHECK-NEXT: ret <vscale x 8 x half> %1			; CHECK-NEXT: ret <vscale x 8 x half> %1
	%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)			%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
	%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)			%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
	ret <vscale x 8 x half> %2			ret <vscale x 8 x half> %2
	Show All 14 Lines
	; CHECK-LABEL: @replace_fadd_intrinsic_double			; CHECK-LABEL: @replace_fadd_intrinsic_double
	; CHECK-NEXT: %1 = fadd fast <vscale x 2 x double> %a, %b			; CHECK-NEXT: %1 = fadd fast <vscale x 2 x double> %a, %b
	; CHECK-NEXT: ret <vscale x 2 x double> %1			; CHECK-NEXT: ret <vscale x 2 x double> %1
	%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)			%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
	%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)			%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
	ret <vscale x 2 x double> %2			ret <vscale x 2 x double> %2
	}			}

				declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
				define <vscale x 8 x half> @replace_fadd_u_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
				; CHECK-LABEL: @replace_fadd_u_intrinsic_half
				; CHECK-NEXT: %1 = fadd fast <vscale x 8 x half> %a, %b
				; CHECK-NEXT: ret <vscale x 8 x half> %1
				%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
				%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
				ret <vscale x 8 x half> %2
				}

				declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
				define <vscale x 4 x float> @replace_fadd_u_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
				; CHECK-LABEL: @replace_fadd_u_intrinsic_float
				; CHECK-NEXT: %1 = fadd fast <vscale x 4 x float> %a, %b
				; CHECK-NEXT: ret <vscale x 4 x float> %1
				%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
				%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
				ret <vscale x 4 x float> %2
				}

				declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
				define <vscale x 2 x double> @replace_fadd_u_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
				; CHECK-LABEL: @replace_fadd_u_intrinsic_double
				; CHECK-NEXT: %1 = fadd fast <vscale x 2 x double> %a, %b
				; CHECK-NEXT: ret <vscale x 2 x double> %1
				%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
				%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
				ret <vscale x 2 x double> %2
				}

	declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
	define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {			define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
	; CHECK-LABEL: @replace_fsub_intrinsic_half			; CHECK-LABEL: @replace_fsub_intrinsic_half
	; CHECK-NEXT: %1 = fsub fast <vscale x 8 x half> %a, %b			; CHECK-NEXT: %1 = fsub fast <vscale x 8 x half> %a, %b
	; CHECK-NEXT: ret <vscale x 8 x half> %1			; CHECK-NEXT: ret <vscale x 8 x half> %1
	%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)			%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
	%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)			%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
	ret <vscale x 8 x half> %2			ret <vscale x 8 x half> %2
	}			}

	declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
	define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {			define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
	; CHECK-LABEL: @replace_fsub_intrinsic_float			; CHECK-LABEL: @replace_fsub_intrinsic_float
	; CHECK-NEXT: %1 = fsub fast <vscale x 4 x float> %a, %b			; CHECK-NEXT: %1 = fsub fast <vscale x 4 x float> %a, %b
	; CHECK-NEXT: ret <vscale x 4 x float> %1			; CHECK-NEXT: ret <vscale x 4 x float> %1
	%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)			%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
	%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)			%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
	ret <vscale x 4 x float> %2			ret <vscale x 4 x float> %2
	}			}


	declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
	define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {			define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
	; CHECK-LABEL: @replace_fsub_intrinsic_double			; CHECK-LABEL: @replace_fsub_intrinsic_double
	; CHECK-NEXT: %1 = fsub fast <vscale x 2 x double> %a, %b			; CHECK-NEXT: %1 = fsub fast <vscale x 2 x double> %a, %b
	; CHECK-NEXT: ret <vscale x 2 x double> %1			; CHECK-NEXT: ret <vscale x 2 x double> %1
	%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)			%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
	%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)			%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
	ret <vscale x 2 x double> %2			ret <vscale x 2 x double> %2
	Show All 13 Lines
	; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag			; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag
	; CHECK-NEXT: %1 = fsub <vscale x 2 x double> %a, %b			; CHECK-NEXT: %1 = fsub <vscale x 2 x double> %a, %b
	; CHECK-NEXT: ret <vscale x 2 x double> %1			; CHECK-NEXT: ret <vscale x 2 x double> %1
	%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)			%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
	%2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)			%2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
	ret <vscale x 2 x double> %2			ret <vscale x 2 x double> %2
	}			}

				declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
				define <vscale x 8 x half> @replace_fsub_u_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
				; CHECK-LABEL: @replace_fsub_u_intrinsic_half
				; CHECK-NEXT: %1 = fsub fast <vscale x 8 x half> %a, %b
				; CHECK-NEXT: ret <vscale x 8 x half> %1
				%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
				%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
				ret <vscale x 8 x half> %2
				}

				declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
				define <vscale x 4 x float> @replace_fsub_u_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
				; CHECK-LABEL: @replace_fsub_u_intrinsic_float
				; CHECK-NEXT: %1 = fsub fast <vscale x 4 x float> %a, %b
				; CHECK-NEXT: ret <vscale x 4 x float> %1
				%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
				%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
				ret <vscale x 4 x float> %2
				}

				declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
				paulwalker-armUnsubmitted Not Done Reply Inline Actions Do you mind moving this negative test so it appears after the three positive ones? paulwalker-arm: Do you mind moving this negative test so it appears after the three positive ones?
				jolanta.jensenAuthorUnsubmitted Done Reply Inline Actions Fixed. jolanta.jensen: Fixed.
				define <vscale x 2 x double> @replace_fsub_u_intrinsic_no_fast_flag(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
				; CHECK-LABEL: @replace_fsub_u_intrinsic_no_fast_flag
				; CHECK-NEXT: %1 = fsub <vscale x 2 x double> %a, %b
				; CHECK-NEXT: ret <vscale x 2 x double> %1
				%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
				%2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
				ret <vscale x 2 x double> %2
				}

				define <vscale x 2 x double> @no_replace_on_non_ptrue_all_u(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
				; CHECK-LABEL: @no_replace_on_non_ptrue_all_u
				; CHECK-NEXT: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
				; CHECK-NEXT: %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
				; CHECK-NEXT: ret <vscale x 2 x double> %2
				%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
				%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
				ret <vscale x 2 x double> %2
				}

	attributes #0 = { "target-features"="+sve" }			attributes #0 = { "target-features"="+sve" }

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll

This file was added.

				; RUN: opt -S -passes=instcombine < %s \| FileCheck %s

				target triple = "aarch64-unknown-linux-gnu"

				mgabkaUnsubmitted Not Done Reply Inline Actions the test in this file do not match the description of the commit message, I think that is an optimization we want to cover separately, @paulwalker-arm wjhat is your opinion? mgabka: the test in this file do not match the description of the commit message, I think that is an…
				paulwalker-armUnsubmitted Not Done Reply Inline Actions @mgabka: This patch is about extending existing combines to also cover their equivalent `_u` intrinsics. The majority relate to replacing intrinsic calls with IR instructions but `instCombineSVEVectorMul` contains more combines. I think it would be messier to try to artificially avoid these combines so am happy for this patch to include them, assuming they're genuinely applicable? with this patch adding the relevant tests. paulwalker-arm: @mgabka: This patch is about extending existing combines to also cover their equivalent `_u`…
				jolanta.jensenAuthorUnsubmitted Done Reply Inline Actions It looks like this test and sve-intrinsic-fma-binops.ll cover all paths taken in instCombineSVEVectorMul. jolanta.jensen: It looks like this test and sve-intrinsic-fma-binops.ll cover all paths taken in…
				; Idempotent fmuls_u -- should compile to just a ret.
				define <vscale x 8 x half> @idempotent_fmul_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
				; CHECK-LABEL: @idempotent_fmul_u_f16(
				; CHECK-NEXT: ret <vscale x 8 x half> [[A:%.*]]
				;
				%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
				%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
				ret <vscale x 8 x half> %2
				}

				define <vscale x 4 x float> @idempotent_fmul_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
				; CHECK-LABEL: @idempotent_fmul_u_f32(
				; CHECK-NEXT: ret <vscale x 4 x float> [[A:%.*]]
				;
				%1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 1.0)
				%2 = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %1)
				ret <vscale x 4 x float> %2
				}

				define <vscale x 2 x double> @idempotent_fmul_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
				; CHECK-LABEL: @idempotent_fmul_u_f64(
				; CHECK-NEXT: ret <vscale x 2 x double> [[A:%.*]]
				;
				%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
				%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
				ret <vscale x 2 x double> %2
				}

				define <vscale x 2 x double> @idempotent_fmul_u_different_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
				; CHECK-LABEL: @idempotent_fmul_u_different_argument_order(
				; CHECK-NEXT: [[TMP1:%.]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG:%.]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[A:%.*]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
				; Different argument order to the above tests.
				%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %1, <vscale x 2 x double> %a)
				ret <vscale x 2 x double> %2
				}

				define <vscale x 8 x half> @idempotent_fmul_u_with_predicated_dup(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
				; CHECK-LABEL: @idempotent_fmul_u_with_predicated_dup(
				; CHECK-NEXT: ret <vscale x 8 x half> [[A:%.*]]
				;
				%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, half 1.0)
				%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
				ret <vscale x 8 x half> %2
				}

				define <vscale x 8 x half> @idempotent_fmul_u_two_dups(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
				; Edge case -- make sure that the case where we're fmultiplying two dups
				; together is sane.
				; CHECK-LABEL: @idempotent_fmul_u_two_dups(
				; CHECK-NEXT: ret <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH3C00, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer)
				;
				%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
				%2 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
				%3 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %1, <vscale x 8 x half> %2)
				ret <vscale x 8 x half> %3
				}

				; Non-idempotent fmuls_u -- we don't expect these to be optimised out.
				define <vscale x 8 x half> @non_idempotent_fmul_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
				; CHECK-LABEL: @non_idempotent_fmul_u_f16(
				; CHECK-NEXT: [[TMP1:%.]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> [[PG:%.]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH4000, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer))
				; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
				;
				%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0)
				%2 = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %1)
				ret <vscale x 8 x half> %2
				}

				define <vscale x 4 x float> @non_idempotent_fmul_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
				; CHECK-LABEL: @non_idempotent_fmul_u_f32(
				; CHECK-NEXT: [[TMP1:%.]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> [[PG:%.]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer))
				; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
				;
				%1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0)
				%2 = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %1)
				ret <vscale x 4 x float> %2
				}

				define <vscale x 2 x double> @non_idempotent_fmul_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
				; CHECK-LABEL: @non_idempotent_fmul_u_f64(
				; CHECK-NEXT: [[TMP1:%.]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG:%.]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 2.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
				;
				%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0)
				%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
				ret <vscale x 2 x double> %2
				}

				define <vscale x 2 x double> @non_idempotent_fmul_u_with_predicated_dup(<vscale x 2 x i1> %pg1, <vscale x 2 x i1> %pg2, <vscale x 2 x double> %a) #0 {
				; Different predicates
				; CHECK-LABEL: @non_idempotent_fmul_u_with_predicated_dup(
				; CHECK-NEXT: [[TMP1:%.]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG1:%.]], double 1.000000e+00)
				; CHECK-NEXT: [[TMP2:%.]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[PG2:%.]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[TMP1]])
				; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
				;
				%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg1, double 1.0)
				%2 = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg2, <vscale x 2 x double> %a, <vscale x 2 x double> %1)
				ret <vscale x 2 x double> %2
				}

				declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
				declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
				declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)

				declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
				declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)

				declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
				declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
				declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)

				attributes #0 = { "target-features"="+sve" }

This is an archive of the discontinued LLVM Phabricator instance.

[SVE ACLE] Extend IR combines for fmul, fsub, fadd to cover _u variants
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 527801

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE ACLE] Extend IR combines for fmul, fsub, fadd to cover _u variantsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 527801

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul_u-idempotency.ll

[SVE ACLE] Extend IR combines for fmul, fsub, fadd to cover _u variants
ClosedPublic