Diff 363455

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 960 Lines • ▼ Show 20 Lines	private:
SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
▲ Show 20 Lines • Show All 153 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,034 Lines • ▼ Show 20 Lines	if (Subtarget->hasNEON()) {
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);		setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);		setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);		setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);		setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);		setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);		setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);		setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);		setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
		for (auto VT : {MVT::v1i64, MVT::v2i64}) {
		dmgreenUnsubmitted Done Reply Inline Actions Can these go into a loop, looping over the UMAX/UMIN/... dmgreen: Can these go into a loop, looping over the UMAX/UMIN/...
		setOperationAction(ISD::UMAX, VT, Custom);
		setOperationAction(ISD::SMAX, VT, Custom);
		setOperationAction(ISD::UMIN, VT, Custom);
		setOperationAction(ISD::SMIN, VT, Custom);
		}

// AArch64 doesn't have MUL.2d:		// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);		setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quad-vector types to detect MULL.		// Custom handling for some quad-vector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);		setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);		setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);		setOperationAction(ISD::MUL, MVT::v2i64, Custom);

▲ Show 20 Lines • Show All 3,768 Lines • ▼ Show 20 Lines	case ISD::SPLAT_VECTOR:
return LowerSPLAT_VECTOR(Op, DAG);		return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:		case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);		return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:		case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);		return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:		case ISD::SDIV:
case ISD::UDIV:		case ISD::UDIV:
return LowerDIV(Op, DAG);		return LowerDIV(Op, DAG);
case ISD::SMIN:		case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
/OverrideNEON=/true);
case ISD::UMIN:		case ISD::UMIN:
		dmgreenUnsubmitted Done Reply Inline Actions These can use fallthroughs to the next case, if they all call LowerMinMax in the same way. dmgreen: These can use fallthroughs to the next case, if they all call LowerMinMax in the same way.
		RinAuthorUnsubmitted Done Reply Inline Actions Good point, I'll change them Rin: Good point, I'll change them
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/OverrideNEON=/true);
case ISD::SMAX:		case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/OverrideNEON=/true);
case ISD::UMAX:		case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,		return LowerMinMax(Op, DAG);
/OverrideNEON=/true);
case ISD::SRA:		case ISD::SRA:
case ISD::SRL:		case ISD::SRL:
case ISD::SHL:		case ISD::SHL:
return LowerVectorSRA_SRL_SHL(Op, DAG);		return LowerVectorSRA_SRL_SHL(Op, DAG);
case ISD::SHL_PARTS:		case ISD::SHL_PARTS:
case ISD::SRL_PARTS:		case ISD::SRL_PARTS:
case ISD::SRA_PARTS:		case ISD::SRA_PARTS:
return LowerShiftParts(Op, DAG);		return LowerShiftParts(Op, DAG);
▲ Show 20 Lines • Show All 2,279 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
assert(VT.isScalableVector() \|\|		assert(VT.isScalableVector() \|\|
useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true));		useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true));

SDLoc DL(Op);		SDLoc DL(Op);
SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));		SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);		return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}		}

		SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
		SelectionDAG &DAG) const {

		EVT VT = Op.getValueType();
		SDLoc DL(Op);
		unsigned Opcode = Op.getOpcode();
		ISD::CondCode CC;
		switch (Opcode) {
		default:
		llvm_unreachable("Wrong instruction");
		case ISD::SMAX:
		CC = ISD::SETGT;
		break;
		case ISD::SMIN:
		CC = ISD::SETLT;
		break;
		case ISD::UMAX:
		CC = ISD::SETUGT;
		break;
		case ISD::UMIN:
		CC = ISD::SETULT;
		break;
		}

		if (VT.isScalableVector() \|\|
		useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true)) {
		switch (Opcode) {
		dmgreenUnsubmitted Done Reply Inline Actions Is this VT check needed? dmgreen: Is this VT check needed?
		RinAuthorUnsubmitted Done Reply Inline Actions Is this VT check needed? Nope, I'll get rid of it Rin: > Is this VT check needed? Nope, I'll get rid of it
		david-armUnsubmitted Done Reply Inline Actions I think you can move this if block to the start of the function, i.e. unsigned Opcode = Op.getOpcode(); if (...) { switch (Opcode) { ... } } david-arm: I think you can move this if block to the start of the function, i.e. unsigned Opcode = Op.
		default:
		david-armUnsubmitted Done Reply Inline Actions I think you might be able to just do this: if (useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true)) { instead? david-arm: I think you might be able to just do this: if (useSVEForFixedLengthVectorVT(VT…
		david-armUnsubmitted Done Reply Inline Actions Hi @Rin, apologies please ignore my previous comment about changing the if statement. I see now we also want scalable vector types to fall into the block too! david-arm: Hi @Rin, apologies please ignore my previous comment about changing the if statement. I see now…
		RinAuthorUnsubmitted Done Reply Inline Actions No worries :) Rin: No worries :)
		llvm_unreachable("Wrong instruction");
		case ISD::SMAX:
		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
		/OverrideNEON=/true);
		case ISD::SMIN:
		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
		/OverrideNEON=/true);
		case ISD::UMAX:
		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
		/OverrideNEON=/true);
		case ISD::UMIN:
		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
		/OverrideNEON=/true);
		}
		}

		SDValue Op0 = Op.getOperand(0);
		SDValue Op1 = Op.getOperand(1);
		SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
		return DAG.getSelect(DL, VT, Cond, Op0, Op1);
		efriedmaUnsubmitted Not Done Reply Inline Actions If I'm understanding correctly, if we mark both ISD::SMAX and ISD::VSELECT as "Expand", vector legalization decides to unroll it. So we mark ISD::SMAX as "Custom", then explicitly lower to a VSELECT to get the code we want. This seems kind of silly, given VSELECT is equivalent to AArch64ISD::BSP. For the sake of making changes to target-independent code easier, maybe we should consider marking ISD::VSELECT "custom"? Or add a target hook to indicate whether the operation is cheap? efriedma: If I'm understanding correctly, if we mark both ISD::SMAX and ISD::VSELECT as "Expand", vector…
		dmgreenUnsubmitted Not Done Reply Inline Actions Yes, we didn't mention that anywhere here. There are three ways we thought of fixing this - either make vselect custom/legal, adding a target hook for the expand code or custom lowering the type. Because it was only a single type, custom lowering seemed like the simplest route forward. The target hook felt messy for one type on one operation, and custom lowering vselect could be a much larger change - one that would easily cause regressions if a lot of other transforms were not added. I guess this turned into more code than I expected, with it being mixed up with SVE lowering. Perhaps custom lowering vselect would be better in the long run if someone was to optimize the BSP's in all the cases it would need, but this patch seems fine to me for the problem it is solving. dmgreen: Yes, we didn't mention that anywhere here. There are three ways we thought of fixing this…
		efriedmaUnsubmitted Not Done Reply Inline Actions Okay. I'm not really concerned about this patch on its own, just thinking about what we might want to do in the future. efriedma: Okay. I'm not really concerned about this patch on its own, just thinking about what we might…
		}

SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,		SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();

if (VT.isScalableVector() \|\|		if (VT.isScalableVector() \|\|
useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true))		useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
true);		true);
▲ Show 20 Lines • Show All 11,600 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 213 Lines • ▼ Show 20 Lines	AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;		return TTI::PSK_Software;
}		}

InstructionCost		InstructionCost
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,		AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {		TTI::TargetCostKind CostKind) {
auto *RetTy = ICA.getReturnType();		auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {		switch (ICA.getID()) {
case Intrinsic::umin:		case Intrinsic::umin:
case Intrinsic::umax: {		case Intrinsic::umax:
		dmgreenUnsubmitted Done Reply Inline Actions This code can be removed now, it can fall through to the smin/smax cases. dmgreen: This code can be removed now, it can fall through to the smin/smax cases.
		RinAuthorUnsubmitted Done Reply Inline Actions Yeah, I was wondering about that. I'll remove it Rin: Yeah, I was wondering about that. I'll remove it
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
// umin(x,y) -> sub(x,usubsat(x,y))
// umax(x,y) -> add(x,usubsat(y,x))
if (LT.second == MVT::v2i64)
return LT.first * 2;
LLVM_FALLTHROUGH;
}
case Intrinsic::smin:		case Intrinsic::smin:
case Intrinsic::smax: {		case Intrinsic::smax: {
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,		static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
MVT::v8i16, MVT::v2i32, MVT::v4i32};		MVT::v8i16, MVT::v2i32, MVT::v4i32};
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);		auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
		// v2i64 types get converted to cmp+bif hence the cost of 2
		dmgreenUnsubmitted Done Reply Inline Actions Maybe add a comment about it being converted to a cmp+bif? dmgreen: Maybe add a comment about it being converted to a cmp+bif?
		RinAuthorUnsubmitted Done Reply Inline Actions Will do Rin: Will do
		if (LT.second == MVT::v2i64)
		return LT.first * 2;
if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))		if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
return LT.first;		return LT.first;
break;		break;
}		}
case Intrinsic::sadd_sat:		case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat:		case Intrinsic::ssub_sat:
case Intrinsic::uadd_sat:		case Intrinsic::uadd_sat:
case Intrinsic::usub_sat: {		case Intrinsic::usub_sat: {
▲ Show 20 Lines • Show All 1,959 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AArch64/min-max.ll

	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
				dmgreenUnsubmitted Not Done Reply Inline Actions I was updating this file the other day. I think if you rebase the changes may disappear. dmgreen: I was updating this file the other day. I think if you rebase the changes may disappear.
				RinAuthorUnsubmitted Done Reply Inline Actions Ah, fair, I'll rebase then Rin: Ah, fair, I'll rebase then
				dmgreenUnsubmitted Done Reply Inline Actions It turns out I had not updated that test yet.. it should now have the changes I mentioned, if you rebase the patch. Can you make sure the instructions have a proper cost now too? They should be 2 I think. There is already some code in AArch64TTIImpl::getIntrinsicInstrCost for it. dmgreen: It turns out I had not updated that test yet.. it should now have the changes I mentioned, if…
				david-armUnsubmitted Done Reply Inline Actions nit: It might be easier to review the patch if you first introduced a NFC change to update the tests using utils/update_analyze_test_checks.py? david-arm: nit: It might be easier to review the patch if you first introduced a NFC change to update the…
	; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze \| FileCheck %s --check-prefixes=CHECK,CHECK-NOF16			; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze \| FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
	; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -cost-model -cost-kind=throughput -analyze \| FileCheck %s --check-prefixes=CHECK,CHECK-F16			; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -cost-model -cost-kind=throughput -analyze \| FileCheck %s --check-prefixes=CHECK,CHECK-F16

	define void @reduce_umin() {			define void @reduce_umin() {
	; CHECK-LABEL: 'reduce_umin'			; CHECK-LABEL: 'reduce_umin'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umin.v1i8(<1 x i8> undef, <1 x i8> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umin.v1i8(<1 x i8> undef, <1 x i8> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umin.v3i8(<3 x i8> undef, <3 x i8> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umin.v3i8(<3 x i8> undef, <3 x i8> undef)
	▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	%V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)			%V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)
	%V3i8 = call <3 x i8> @llvm.smin.v3i8(<3 x i8> undef, <3 x i8> undef)			%V3i8 = call <3 x i8> @llvm.smin.v3i8(<3 x i8> undef, <3 x i8> undef)
	%V4i8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)			%V4i8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
	%V8i8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)			%V8i8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
	%V16i8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)			%V16i8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
	%V32i8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)			%V32i8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef)
	Show All 21 Lines
	; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
	; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	%V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)			%V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)
	%V3i8 = call <3 x i8> @llvm.smax.v3i8(<3 x i8> undef, <3 x i8> undef)			%V3i8 = call <3 x i8> @llvm.smax.v3i8(<3 x i8> undef, <3 x i8> undef)
	%V4i8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)			%V4i8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
	%V8i8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)			%V8i8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
	%V16i8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)			%V16i8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
	%V32i8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)			%V32i8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef)
	▲ Show 20 Lines • Show All 277 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/min-max.ll

Show First 20 Lines • Show All 179 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret void		ret void
}		}

declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone		declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone

define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {		define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: smax1i64:		; CHECK-ISEL-LABEL: smax1i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1		; CHECK-ISEL-NEXT: cmgt d2, d0, d1
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: smax1i64:		; CHECK-GLOBAL-LABEL: smax1i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: fmov x8, d0		; CHECK-GLOBAL-NEXT: fmov x8, d0
; CHECK-GLOBAL-NEXT: fmov x9, d1		; CHECK-GLOBAL-NEXT: fmov x9, d1
; CHECK-GLOBAL-NEXT: cmp x8, x9		; CHECK-GLOBAL-NEXT: cmp x8, x9
; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, gt		; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, gt
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)		%c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)
ret <1 x i64> %c		ret <1 x i64> %c
}		}

declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone		declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone

define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {		define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smax2i64:		; CHECK-ISEL-LABEL: smax2i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v1.d[1]		; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: mov x9, v0.d[1]		; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: fmov x10, d1
; CHECK-ISEL-NEXT: fmov x11, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x9, x11, x10, gt
; CHECK-ISEL-NEXT: fmov d0, x9
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: smax2i64:		; CHECK-GLOBAL-LABEL: smax2i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v2.2d, v0.2d, v1.2d		; CHECK-GLOBAL-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b		; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)		%c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c		ret <2 x i64> %c
}		}

declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone		declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone

define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {		define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: smax4i64:		; CHECK-ISEL-LABEL: smax4i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v2.d[1]		; CHECK-ISEL-NEXT: cmgt v4.2d, v0.2d, v2.2d
; CHECK-ISEL-NEXT: mov x9, v0.d[1]		; CHECK-ISEL-NEXT: cmgt v5.2d, v1.2d, v3.2d
; CHECK-ISEL-NEXT: fmov x10, d2		; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: fmov x11, d0		; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: mov x9, v3.d[1]
; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
; CHECK-ISEL-NEXT: mov x11, v1.d[1]
; CHECK-ISEL-NEXT: cmp x11, x9
; CHECK-ISEL-NEXT: fmov d0, x10
; CHECK-ISEL-NEXT: fmov x10, d3
; CHECK-ISEL-NEXT: csel x9, x11, x9, gt
; CHECK-ISEL-NEXT: fmov x11, d1
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
; CHECK-ISEL-NEXT: fmov d1, x10
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: mov v1.d[1], x9
; CHECK-ISEL-NEXT: stp q0, q1, [x0]		; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: smax4i64:		; CHECK-GLOBAL-LABEL: smax4i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v4.2d, v0.2d, v2.2d		; CHECK-GLOBAL-NEXT: cmgt v4.2d, v0.2d, v2.2d
; CHECK-GLOBAL-NEXT: cmgt v5.2d, v1.2d, v3.2d		; CHECK-GLOBAL-NEXT: cmgt v5.2d, v1.2d, v3.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63		; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
▲ Show 20 Lines • Show All 183 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret void		ret void
}		}

declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone		declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone

define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {		define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: umax1i64:		; CHECK-ISEL-LABEL: umax1i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1		; CHECK-ISEL-NEXT: cmhi d2, d0, d1
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, hi
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: umax1i64:		; CHECK-GLOBAL-LABEL: umax1i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: fmov x8, d0		; CHECK-GLOBAL-NEXT: fmov x8, d0
; CHECK-GLOBAL-NEXT: fmov x9, d1		; CHECK-GLOBAL-NEXT: fmov x9, d1
; CHECK-GLOBAL-NEXT: cmp x8, x9		; CHECK-GLOBAL-NEXT: cmp x8, x9
; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, hi		; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, hi
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)		%c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)
ret <1 x i64> %c		ret <1 x i64> %c
}		}

declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone		declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone

define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {		define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umax2i64:		; CHECK-ISEL-LABEL: umax2i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v1.2d, v1.2d, v0.2d		; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v1.2d		; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: umax2i64:		; CHECK-GLOBAL-LABEL: umax2i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v2.2d, v0.2d, v1.2d		; CHECK-GLOBAL-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b		; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)		%c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c		ret <2 x i64> %c
}		}

declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone		declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone

define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {		define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: umax4i64:		; CHECK-ISEL-LABEL: umax4i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v2.2d, v2.2d, v0.2d		; CHECK-ISEL-NEXT: cmhi v4.2d, v0.2d, v2.2d
; CHECK-ISEL-NEXT: uqsub v3.2d, v3.2d, v1.2d		; CHECK-ISEL-NEXT: cmhi v5.2d, v1.2d, v3.2d
; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v2.2d		; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: add v1.2d, v1.2d, v3.2d		; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]		; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: umax4i64:		; CHECK-GLOBAL-LABEL: umax4i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v4.2d, v0.2d, v2.2d		; CHECK-GLOBAL-NEXT: cmhi v4.2d, v0.2d, v2.2d
; CHECK-GLOBAL-NEXT: cmhi v5.2d, v1.2d, v3.2d		; CHECK-GLOBAL-NEXT: cmhi v5.2d, v1.2d, v3.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63		; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
▲ Show 20 Lines • Show All 183 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret void		ret void
}		}

declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone		declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone

define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {		define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: smin1i64:		; CHECK-ISEL-LABEL: smin1i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1		; CHECK-ISEL-NEXT: cmgt d2, d1, d0
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: smin1i64:		; CHECK-GLOBAL-LABEL: smin1i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: fmov x8, d0		; CHECK-GLOBAL-NEXT: fmov x8, d0
; CHECK-GLOBAL-NEXT: fmov x9, d1		; CHECK-GLOBAL-NEXT: fmov x9, d1
; CHECK-GLOBAL-NEXT: cmp x8, x9		; CHECK-GLOBAL-NEXT: cmp x8, x9
; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, lt		; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, lt
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)		%c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)
ret <1 x i64> %c		ret <1 x i64> %c
}		}

declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone		declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone

define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {		define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smin2i64:		; CHECK-ISEL-LABEL: smin2i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v1.d[1]		; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d
; CHECK-ISEL-NEXT: mov x9, v0.d[1]		; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: fmov x10, d1
; CHECK-ISEL-NEXT: fmov x11, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x9, x11, x10, lt
; CHECK-ISEL-NEXT: fmov d0, x9
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: smin2i64:		; CHECK-GLOBAL-LABEL: smin2i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v2.2d, v1.2d, v0.2d		; CHECK-GLOBAL-NEXT: cmgt v2.2d, v1.2d, v0.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b		; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)		%c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c		ret <2 x i64> %c
}		}

declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone		declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone

define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {		define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: smin4i64:		; CHECK-ISEL-LABEL: smin4i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v2.d[1]		; CHECK-ISEL-NEXT: cmgt v4.2d, v2.2d, v0.2d
; CHECK-ISEL-NEXT: mov x9, v0.d[1]		; CHECK-ISEL-NEXT: cmgt v5.2d, v3.2d, v1.2d
; CHECK-ISEL-NEXT: fmov x10, d2		; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: fmov x11, d0		; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: mov x9, v3.d[1]
; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
; CHECK-ISEL-NEXT: mov x11, v1.d[1]
; CHECK-ISEL-NEXT: cmp x11, x9
; CHECK-ISEL-NEXT: fmov d0, x10
; CHECK-ISEL-NEXT: fmov x10, d3
; CHECK-ISEL-NEXT: csel x9, x11, x9, lt
; CHECK-ISEL-NEXT: fmov x11, d1
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
; CHECK-ISEL-NEXT: fmov d1, x10
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: mov v1.d[1], x9
; CHECK-ISEL-NEXT: stp q0, q1, [x0]		; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: smin4i64:		; CHECK-GLOBAL-LABEL: smin4i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmgt v4.2d, v2.2d, v0.2d		; CHECK-GLOBAL-NEXT: cmgt v4.2d, v2.2d, v0.2d
; CHECK-GLOBAL-NEXT: cmgt v5.2d, v3.2d, v1.2d		; CHECK-GLOBAL-NEXT: cmgt v5.2d, v3.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63		; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
▲ Show 20 Lines • Show All 183 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret void		ret void
}		}

declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone		declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone

define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {		define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: umin1i64:		; CHECK-ISEL-LABEL: umin1i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1		; CHECK-ISEL-NEXT: cmhi d2, d1, d0
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lo
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: umin1i64:		; CHECK-GLOBAL-LABEL: umin1i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: fmov x8, d0		; CHECK-GLOBAL-NEXT: fmov x8, d0
; CHECK-GLOBAL-NEXT: fmov x9, d1		; CHECK-GLOBAL-NEXT: fmov x9, d1
; CHECK-GLOBAL-NEXT: cmp x8, x9		; CHECK-GLOBAL-NEXT: cmp x8, x9
; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, lo		; CHECK-GLOBAL-NEXT: fcsel d0, d0, d1, lo
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)		%c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)
ret <1 x i64> %c		ret <1 x i64> %c
}		}

declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone		declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone

define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {		define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umin2i64:		; CHECK-ISEL-LABEL: umin2i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v1.2d, v0.2d, v1.2d		; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d
; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v1.2d		; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: umin2i64:		; CHECK-GLOBAL-LABEL: umin2i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v2.2d, v1.2d, v0.2d		; CHECK-GLOBAL-NEXT: cmhi v2.2d, v1.2d, v0.2d
; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: shl v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63		; CHECK-GLOBAL-NEXT: sshr v2.2d, v2.2d, #63
; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b		; CHECK-GLOBAL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GLOBAL-NEXT: ret		; CHECK-GLOBAL-NEXT: ret
%c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)		%c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %c		ret <2 x i64> %c
}		}

declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone		declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone

define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {		define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: umin4i64:		; CHECK-ISEL-LABEL: umin4i64:
; CHECK-ISEL: // %bb.0:		; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v2.2d, v0.2d, v2.2d		; CHECK-ISEL-NEXT: cmhi v4.2d, v2.2d, v0.2d
; CHECK-ISEL-NEXT: uqsub v3.2d, v1.2d, v3.2d		; CHECK-ISEL-NEXT: cmhi v5.2d, v3.2d, v1.2d
; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v2.2d		; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: sub v1.2d, v1.2d, v3.2d		; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]		; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret		; CHECK-ISEL-NEXT: ret
;		;
; CHECK-GLOBAL-LABEL: umin4i64:		; CHECK-GLOBAL-LABEL: umin4i64:
; CHECK-GLOBAL: // %bb.0:		; CHECK-GLOBAL: // %bb.0:
; CHECK-GLOBAL-NEXT: cmhi v4.2d, v2.2d, v0.2d		; CHECK-GLOBAL-NEXT: cmhi v4.2d, v2.2d, v0.2d
; CHECK-GLOBAL-NEXT: cmhi v5.2d, v3.2d, v1.2d		; CHECK-GLOBAL-NEXT: cmhi v5.2d, v3.2d, v1.2d
; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63		; CHECK-GLOBAL-NEXT: shl v4.2d, v4.2d, #63
Show All 11 Lines

llvm/test/CodeGen/AArch64/minmax.ll

Show First 20 Lines • Show All 154 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%t1 = icmp ugt <2 x i64> %a, %b		%t1 = icmp ugt <2 x i64> %a, %b
%t2 = select <2 x i1> %t1, <2 x i64> %a, <2 x i64> %b		%t2 = select <2 x i1> %t1, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %t2		ret <2 x i64> %t2
}		}

define <4 x i64> @t15(<4 x i64> %a, <4 x i64> %b) {		define <4 x i64> @t15(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: t15:		; CHECK-LABEL: t15:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmhs v4.2d, v3.2d, v1.2d		; CHECK-NEXT: cmhi v4.2d, v2.2d, v0.2d
; CHECK-NEXT: cmhs v5.2d, v2.2d, v0.2d		; CHECK-NEXT: cmhi v5.2d, v3.2d, v1.2d
; CHECK-NEXT: bif v0.16b, v2.16b, v5.16b		; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-NEXT: bif v1.16b, v3.16b, v4.16b		; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%t1 = icmp ule <4 x i64> %a, %b		%t1 = icmp ule <4 x i64> %a, %b
%t2 = select <4 x i1> %t1, <4 x i64> %a, <4 x i64> %b		%t2 = select <4 x i1> %t1, <4 x i64> %a, <4 x i64> %b
ret <4 x i64> %t2		ret <4 x i64> %t2
}		}

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

Show First 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%b = call i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a)		%b = call i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a)
ret i128 %b		ret i128 %b
}		}

; No i64 vector support for UMAX.		; No i64 vector support for UMAX.
define i64 @test_v2i64(<2 x i64> %a) nounwind {		define i64 @test_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: test_v2i64:		; CHECK-LABEL: test_v2i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fmov x9, d0		; CHECK-NEXT: cmhi d2, d0, d1
; CHECK-NEXT: cmp x9, x8		; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: csel x0, x9, x8, hi		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
		david-armUnsubmitted Not Done Reply Inline Actions Hi @Rin, at first glance this code looks like it might be worse than before? I realise the instruction count is the same, but I wonder if the cost of 'ext' might be higher? david-arm: Hi @Rin, at first glance this code looks like it might be worse than before? I realise the…
		dmgreenUnsubmitted Not Done Reply Inline Actions Why would the cost of an ext be higher? dmgreen: Why would the cost of an ext be higher?
		david-armUnsubmitted Not Done Reply Inline Actions @dmgreen it might not be! I just wondered if ext was worse than the mov that's all. david-arm: @dmgreen it might not be! I just wondered if ext was worse than the mov that's all.
%b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)		%b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
ret i64 %b		ret i64 %b
}		}

define i8 @test_v3i8(<3 x i8> %a) nounwind {		define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-LABEL: test_v3i8:		; CHECK-LABEL: test_v3i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: movi d0, #0000000000000000		; CHECK-NEXT: movi d0, #0000000000000000
▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Optimise min/max lowering in ISel
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 363455

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/AArch64/min-max.ll

llvm/test/CodeGen/AArch64/min-max.ll

llvm/test/CodeGen/AArch64/minmax.ll

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Optimise min/max lowering in ISelClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 363455

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/AArch64/min-max.ll

llvm/test/CodeGen/AArch64/min-max.ll

llvm/test/CodeGen/AArch64/minmax.ll

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

[AArch64] Optimise min/max lowering in ISel
ClosedPublic