This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
-
AArch64TargetTransformInfo.cpp
-
test/Transforms/InstCombine/AArch64/
-
Transforms/
-
InstCombine/
-
AArch64/
-
neon-min-max-intrinsics.ll

Differential D125234

[AArch64] Remove redundant f{min,max}nm intrinsics.
ClosedPublic

Authored by fhahn on May 9 2022, 7:31 AM.

Download Raw Diff

Details

Reviewers

aemerson
t.p.northover
dmgreen

Commits

rG17a73992dd8b: [AArch64] Remove redundant f{min,max}nm intrinsics.

Summary

The patch extends AArch64TTIImpl::instCombineIntrinsic to simplify
llvm.aarch64.neon.f{min,max}nm(a, a) -> a.

This helps with simplifying code written using the ACLE, e.g.
see https://godbolt.org/z/jYxsoc89c

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

fhahn created this revision.May 9 2022, 7:31 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 9 2022, 7:31 AM

Herald added subscribers: hiraditya, kristof.beyls. · View Herald Transcript

fhahn requested review of this revision.May 9 2022, 7:31 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 9 2022, 7:31 AM

Harbormaster completed remote builds in B163483: Diff 428081.May 9 2022, 7:59 AM

Sounds good to me.

Do we need aarch64_neon_fmaxnm? Or is it equivalent to llvm.fmaxnum? We convert it late in the backend, maybe we should be doing that in the frontend instead, and removing the need for aarch64_neon_fmaxnm entirely. It would allow a lot more optimizations like this without the need to implement them all specifically.

This revision is now accepted and ready to land.May 10 2022, 3:02 AM

In D125234#3502984, @dmgreen wrote:

Sounds good to me.

Do we need aarch64_neon_fmaxnm? Or is it equivalent to llvm.fmaxnum? We convert it late in the backend, maybe we should be doing that in the frontend instead, and removing the need for aarch64_neon_fmaxnm entirely. It would allow a lot more optimizations like this without the need to implement them all specifically.

I *think* they behave differently with respect to signaling NaNs. IIUC llvm.maxnum always returns quite NaNs:

‘llvm.maxnum.*’ Intrinsic¶

Follows the IEEE-754 semantics for maxNum except for the handling of signaling NaNs. This matches the behavior of libm’s fmax.

If either operand is a NaN, returns the other non-NaN operand. Returns NaN only if both operands are NaN. The returned NaN is always quiet. If the operands compare equal, returns a value that compares equal to both operands. This means that fmax(+/-0.0, +/-0.0) could return either -0.0 or 0.0.

AArch64's FMAXNM's says NaNs are handled according to the IEEE 754-2008 standard., so it seems like it would handle signaling NaNs, but I may be wrong. Perhaps @scanon knows more.

This revision was landed with ongoing or failed builds.May 10 2022, 12:00 PM

Closed by commit rG17a73992dd8b: [AArch64] Remove redundant f{min,max}nm intrinsics. (authored by fhahn). · Explain Why

This revision was automatically updated to reflect the committed changes.

fhahn added a commit: rG17a73992dd8b: [AArch64] Remove redundant f{min,max}nm intrinsics..

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64TargetTransformInfo.cpp

13 lines

test/

Transforms/

InstCombine/

AArch64/

neon-min-max-intrinsics.ll

18 lines

Diff 428457

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,213 Lines • ▼ Show 20 Lines	if (Divisor.isNegatedPowerOf2()) {
auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg,		auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg,
{ASRD->getType()}, {ASRD, Pred, ASRD});		{ASRD->getType()}, {ASRD, Pred, ASRD});
return IC.replaceInstUsesWith(II, NEG);		return IC.replaceInstUsesWith(II, NEG);
}		}

return None;		return None;
}		}

		static Optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
		IntrinsicInst &II) {
		Value *A = II.getArgOperand(0);
		Value *B = II.getArgOperand(1);
		if (A == B)
		return IC.replaceInstUsesWith(II, A);

		return None;
		}

Optional<Instruction *>		Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,		AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {		IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();		Intrinsic::ID IID = II.getIntrinsicID();
switch (IID) {		switch (IID) {
default:		default:
break;		break;
		case Intrinsic::aarch64_neon_fmaxnm:
		case Intrinsic::aarch64_neon_fminnm:
		return instCombineMaxMinNM(IC, II);
case Intrinsic::aarch64_sve_convert_from_svbool:		case Intrinsic::aarch64_sve_convert_from_svbool:
return instCombineConvertFromSVBool(IC, II);		return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:		case Intrinsic::aarch64_sve_dup:
return instCombineSVEDup(IC, II);		return instCombineSVEDup(IC, II);
case Intrinsic::aarch64_sve_dup_x:		case Intrinsic::aarch64_sve_dup_x:
return instCombineSVEDupX(IC, II);		return instCombineSVEDupX(IC, II);
case Intrinsic::aarch64_sve_cmpne:		case Intrinsic::aarch64_sve_cmpne:
case Intrinsic::aarch64_sve_cmpne_wide:		case Intrinsic::aarch64_sve_cmpne_wide:
▲ Show 20 Lines • Show All 1,614 Lines • Show Last 20 Lines

llvm/test/Transforms/InstCombine/AArch64/neon-min-max-intrinsics.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -passes=instcombine -mtriple=arm64-apple-ios -S %s \| FileCheck %s			; RUN: opt -passes=instcombine -mtriple=arm64-apple-ios -S %s \| FileCheck %s

	declare <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half>, <4 x half>)			declare <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half>, <4 x half>)
	declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>)			declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>)
	declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>)			declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>)

	define <4 x half> @fmaxnm_v4f16_same_args(<4 x half> %a) {			define <4 x half> @fmaxnm_v4f16_same_args(<4 x half> %a) {
	; CHECK-LABEL: @fmaxnm_v4f16_same_args(			; CHECK-LABEL: @fmaxnm_v4f16_same_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> [[A:%.]], <4 x half> [[A]])			; CHECK-NEXT: ret <4 x half> [[A:%.*]]
	; CHECK-NEXT: ret <4 x half> [[R]]
	;			;
	%r = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> %a, <4 x half> %a)			%r = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> %a, <4 x half> %a)
	ret <4 x half> %r			ret <4 x half> %r
	}			}

	define <4 x half> @fmaxnm_v4f16_different_args(<4 x half> %a, <4 x half> %b) {			define <4 x half> @fmaxnm_v4f16_different_args(<4 x half> %a, <4 x half> %b) {
	; CHECK-LABEL: @fmaxnm_v4f16_different_args(			; CHECK-LABEL: @fmaxnm_v4f16_different_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> [[A:%.]], <4 x half> [[B:%.*]])			; CHECK-NEXT: [[R:%.]] = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> [[A:%.]], <4 x half> [[B:%.*]])
	; CHECK-NEXT: ret <4 x half> [[R]]			; CHECK-NEXT: ret <4 x half> [[R]]
	;			;
	%r = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> %a, <4 x half> %b)			%r = call <4 x half> @llvm.aarch64.neon.fmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
	ret <4 x half> %r			ret <4 x half> %r
	}			}

	define <4 x float> @fmaxnm_v4f32_same_args(<4 x float> %a) {			define <4 x float> @fmaxnm_v4f32_same_args(<4 x float> %a) {
	; CHECK-LABEL: @fmaxnm_v4f32_same_args(			; CHECK-LABEL: @fmaxnm_v4f32_same_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[A:%.]], <4 x float> [[A]])			; CHECK-NEXT: ret <4 x float> [[A:%.*]]
	; CHECK-NEXT: ret <4 x float> [[R]]
	;			;
	%r = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %a)			%r = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %a)
	ret <4 x float> %r			ret <4 x float> %r
	}			}

	define <4 x float> @fmaxnm_v4f32_different_args(<4 x float> %a, <4 x float> %b) {			define <4 x float> @fmaxnm_v4f32_different_args(<4 x float> %a, <4 x float> %b) {
	; CHECK-LABEL: @fmaxnm_v4f32_different_args(			; CHECK-LABEL: @fmaxnm_v4f32_different_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[A:%.]], <4 x float> [[B:%.*]])			; CHECK-NEXT: [[R:%.]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[A:%.]], <4 x float> [[B:%.*]])
	; CHECK-NEXT: ret <4 x float> [[R]]			; CHECK-NEXT: ret <4 x float> [[R]]
	;			;
	%r = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)			%r = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
	ret <4 x float> %r			ret <4 x float> %r
	}			}

	define <2 x double> @fmaxnm_v2f64_same_args(<2 x double> %a) {			define <2 x double> @fmaxnm_v2f64_same_args(<2 x double> %a) {
	; CHECK-LABEL: @fmaxnm_v2f64_same_args(			; CHECK-LABEL: @fmaxnm_v2f64_same_args(
	; CHECK-NEXT: [[R:%.]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[A:%.]], <2 x double> [[A]])			; CHECK-NEXT: ret <2 x double> [[A:%.*]]
	; CHECK-NEXT: ret <2 x double> [[R]]
	;			;
	%r = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %a)			%r = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %a)
	ret <2 x double> %r			ret <2 x double> %r
	}			}

	define <2 x double> @fmaxnm_v2f64_different_args(<2 x double> %a, <2 x double> %b) {			define <2 x double> @fmaxnm_v2f64_different_args(<2 x double> %a, <2 x double> %b) {
	; CHECK-LABEL: @fmaxnm_v2f64_different_args(			; CHECK-LABEL: @fmaxnm_v2f64_different_args(
	; CHECK-NEXT: [[R:%.]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[A:%.]], <2 x double> [[B:%.*]])			; CHECK-NEXT: [[R:%.]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[A:%.]], <2 x double> [[B:%.*]])
	; CHECK-NEXT: ret <2 x double> [[R]]			; CHECK-NEXT: ret <2 x double> [[R]]
	;			;
	%r = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)			%r = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
	ret <2 x double> %r			ret <2 x double> %r
	}			}

	declare <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half>, <4 x half>)			declare <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half>, <4 x half>)
	declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>)			declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>)
	declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>)			declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>)

	define <4 x half> @fminnm_v4f16_same_args(<4 x half> %a) {			define <4 x half> @fminnm_v4f16_same_args(<4 x half> %a) {
	; CHECK-LABEL: @fminnm_v4f16_same_args(			; CHECK-LABEL: @fminnm_v4f16_same_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> [[A:%.]], <4 x half> [[A]])			; CHECK-NEXT: ret <4 x half> [[A:%.*]]
	; CHECK-NEXT: ret <4 x half> [[R]]
	;			;
	%r = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> %a, <4 x half> %a)			%r = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> %a, <4 x half> %a)
	ret <4 x half> %r			ret <4 x half> %r
	}			}

	define <4 x half> @fminnm_v4f16_different_args(<4 x half> %a, <4 x half> %b) {			define <4 x half> @fminnm_v4f16_different_args(<4 x half> %a, <4 x half> %b) {
	; CHECK-LABEL: @fminnm_v4f16_different_args(			; CHECK-LABEL: @fminnm_v4f16_different_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> [[A:%.]], <4 x half> [[B:%.*]])			; CHECK-NEXT: [[R:%.]] = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> [[A:%.]], <4 x half> [[B:%.*]])
	; CHECK-NEXT: ret <4 x half> [[R]]			; CHECK-NEXT: ret <4 x half> [[R]]
	;			;
	%r = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> %a, <4 x half> %b)			%r = call <4 x half> @llvm.aarch64.neon.fminnm.v4f16(<4 x half> %a, <4 x half> %b)
	ret <4 x half> %r			ret <4 x half> %r
	}			}

	define <4 x float> @fminnm_v4f32_same_args(<4 x float> %a) {			define <4 x float> @fminnm_v4f32_same_args(<4 x float> %a) {
	; CHECK-LABEL: @fminnm_v4f32_same_args(			; CHECK-LABEL: @fminnm_v4f32_same_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[A:%.]], <4 x float> [[A]])			; CHECK-NEXT: ret <4 x float> [[A:%.*]]
	; CHECK-NEXT: ret <4 x float> [[R]]
	;			;
	%r = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %a)			%r = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %a)
	ret <4 x float> %r			ret <4 x float> %r
	}			}

	define <4 x float> @fminnm_v4f32_different_args(<4 x float> %a, <4 x float> %b) {			define <4 x float> @fminnm_v4f32_different_args(<4 x float> %a, <4 x float> %b) {
	; CHECK-LABEL: @fminnm_v4f32_different_args(			; CHECK-LABEL: @fminnm_v4f32_different_args(
	; CHECK-NEXT: [[R:%.]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[A:%.]], <4 x float> [[B:%.*]])			; CHECK-NEXT: [[R:%.]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[A:%.]], <4 x float> [[B:%.*]])
	; CHECK-NEXT: ret <4 x float> [[R]]			; CHECK-NEXT: ret <4 x float> [[R]]
	;			;
	%r = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)			%r = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
	ret <4 x float> %r			ret <4 x float> %r
	}			}

	define <2 x double> @fminnm_v2f64_same_args(<2 x double> %a) {			define <2 x double> @fminnm_v2f64_same_args(<2 x double> %a) {
	; CHECK-LABEL: @fminnm_v2f64_same_args(			; CHECK-LABEL: @fminnm_v2f64_same_args(
	; CHECK-NEXT: [[R:%.]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[A:%.]], <2 x double> [[A]])			; CHECK-NEXT: ret <2 x double> [[A:%.*]]
	; CHECK-NEXT: ret <2 x double> [[R]]
	;			;
	%r = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %a)			%r = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %a)
	ret <2 x double> %r			ret <2 x double> %r
	}			}

	define <2 x double> @fminnm_v2f64_different_args(<2 x double> %a, <2 x double> %b) {			define <2 x double> @fminnm_v2f64_different_args(<2 x double> %a, <2 x double> %b) {
	; CHECK-LABEL: @fminnm_v2f64_different_args(			; CHECK-LABEL: @fminnm_v2f64_different_args(
	; CHECK-NEXT: [[R:%.]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[A:%.]], <2 x double> [[B:%.*]])			; CHECK-NEXT: [[R:%.]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[A:%.]], <2 x double> [[B:%.*]])
	; CHECK-NEXT: ret <2 x double> [[R]]			; CHECK-NEXT: ret <2 x double> [[R]]
	;			;
	%r = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)			%r = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
	ret <2 x double> %r			ret <2 x double> %r
	}			}