Diff 389977

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 98 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
SUB_PRED,		SUB_PRED,
UDIV_PRED,		UDIV_PRED,
UMAX_PRED,		UMAX_PRED,
UMIN_PRED,		UMIN_PRED,

// Unpredicated vector instructions		// Unpredicated vector instructions
BIC,		BIC,

		SRAD_MERGE_OP1,

// Predicated instructions with the result of inactive lanes provided by the		// Predicated instructions with the result of inactive lanes provided by the
// last operand.		// last operand.
FABS_MERGE_PASSTHRU,		FABS_MERGE_PASSTHRU,
FCEIL_MERGE_PASSTHRU,		FCEIL_MERGE_PASSTHRU,
FFLOOR_MERGE_PASSTHRU,		FFLOOR_MERGE_PASSTHRU,
FNEARBYINT_MERGE_PASSTHRU,		FNEARBYINT_MERGE_PASSTHRU,
FNEG_MERGE_PASSTHRU,		FNEG_MERGE_PASSTHRU,
FRECPX_MERGE_PASSTHRU,		FRECPX_MERGE_PASSTHRU,
▲ Show 20 Lines • Show All 1,029 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,943 Lines • ▼ Show 20 Lines	case AArch64ISD::FIRST_NUMBER:
MAKE_CASE(AArch64ISD::SMAX_PRED)		MAKE_CASE(AArch64ISD::SMAX_PRED)
MAKE_CASE(AArch64ISD::SMIN_PRED)		MAKE_CASE(AArch64ISD::SMIN_PRED)
MAKE_CASE(AArch64ISD::SRA_PRED)		MAKE_CASE(AArch64ISD::SRA_PRED)
MAKE_CASE(AArch64ISD::SRL_PRED)		MAKE_CASE(AArch64ISD::SRL_PRED)
MAKE_CASE(AArch64ISD::SUB_PRED)		MAKE_CASE(AArch64ISD::SUB_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)		MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)		MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)		MAKE_CASE(AArch64ISD::UMIN_PRED)
		MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
▲ Show 20 Lines • Show All 350 Lines • ▼ Show 20 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

// Forward declarations of SVE fixed length lowering helpers		// Forward declarations of SVE fixed length lowering helpers
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);		static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);		static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);		static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFixedMaskToScalableVector(SDValue Mask,		static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG);		SelectionDAG &DAG);
		static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
		EVT VT);

/// isZerosVector - Check whether SDNode N is a zero-filled vector.		/// isZerosVector - Check whether SDNode N is a zero-filled vector.
static bool isZerosVector(const SDNode *N) {		static bool isZerosVector(const SDNode *N) {
// Look through a bit convert.		// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)		while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();		N = N->getOperand(0).getNode();

if (ISD::isConstantSplatVectorAllZeros(N))		if (ISD::isConstantSplatVectorAllZeros(N))
▲ Show 20 Lines • Show All 8,631 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,

// This will be matched by custom code during ISelDAGToDAG.		// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())		if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
return Op;		return Op;

return SDValue();		return SDValue();
}		}

		static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
		if (Op.getOpcode() != AArch64ISD::DUP &&
		Op.getOpcode() != ISD::SPLAT_VECTOR &&
		Op.getOpcode() != ISD::BUILD_VECTOR)
		return false;

		if (Op.getOpcode() == ISD::BUILD_VECTOR &&
		!isAllConstantBuildVector(Op, SplatVal))
		return false;

		if (Op.getOpcode() != ISD::BUILD_VECTOR &&
		!isa<ConstantSDNode>(Op->getOperand(0)))
		return false;

		SplatVal = Op->getConstantOperandVal(0);
		if (Op.getValueType().getVectorElementType() != MVT::i64)
		SplatVal = (int32_t)SplatVal;

		Negated = false;
		if (isPowerOf2_64(SplatVal))
		return true;

		Negated = true;
		if (isPowerOf2_64(-SplatVal)) {
		SplatVal = -SplatVal;
		return true;
		}

		return false;
		}

SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {		SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
		SDLoc dl(Op);

if (useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true))		if (useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true))
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);		return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);

assert(VT.isScalableVector() && "Expected a scalable vector.");		assert(VT.isScalableVector() && "Expected a scalable vector.");

bool Signed = Op.getOpcode() == ISD::SDIV;		bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;		unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;

		bool Negated;
		uint64_t SplatVal;
		if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
		SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
		SDValue Res =
		DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
		DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
		if (Negated)
		Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);

		return Res;
		}

if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv2i64)		if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv2i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode);		return LowerToPredicatedOp(Op, DAG, PredOpcode);

// SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit		// SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
// operations, and truncate the result.		// operations, and truncate the result.
EVT WidenedVT;		EVT WidenedVT;
if (VT == MVT::nxv16i8)		if (VT == MVT::nxv16i8)
WidenedVT = MVT::nxv8i16;		WidenedVT = MVT::nxv8i16;
else if (VT == MVT::nxv8i16)		else if (VT == MVT::nxv8i16)
WidenedVT = MVT::nxv4i32;		WidenedVT = MVT::nxv4i32;
else		else
llvm_unreachable("Unexpected Custom DIV operation");		llvm_unreachable("Unexpected Custom DIV operation");

SDLoc dl(Op);
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;		unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;		unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));		SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));		SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));		SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));		SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);		SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);		SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
▲ Show 20 Lines • Show All 1,975 Lines • ▼ Show 20 Lines
SDValue		SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,		AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,		SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {		SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();		AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))		if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV		return SDValue(N,0); // Lower SDIV as SDIV

// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

		paulwalker-armUnsubmitted Not Done Reply Inline Actions This change is not needed anymore. paulwalker-arm: This change is not needed anymore.
		// For scalable and fixed types, mark them as cheap so we can handle it much
		// later. This allows us to handle larger than legal types.
		if (VT.isScalableVector() \|\| Subtarget->useSVEForFixedLengthVectors())
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This should just be `handle larger than legal types` as it applies to all vector types. paulwalker-arm: This should just be `handle larger than legal types` as it applies to all vector types.
		return SDValue(N, 0);

		// fold (sdiv X, pow2)
if ((VT != MVT::i32 && VT != MVT::i64) \|\|		if ((VT != MVT::i32 && VT != MVT::i64) \|\|
!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))		!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))
return SDValue();		return SDValue();
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Given this is a dag combine does this need to ensure `VT` is a legal type before it can safely emit an `AArch64ISD` specific node? `useSVEForFixedLengthVectorVT` has probably got you covered for fixed-length types but you're also handling scalable vector types here. Just a thought but given the `isIntDivCheap` logic above I'm wondering if it is better to do likewise for vectors when SVE will be used, i.e. just say they're cheap and then have isel rules to lower them to `ASRD`. I believe this'll mean you'll get free handling for the larger than legal types. That said, the one thing I'm not sure about is if that makes the handling of `isNegatedPowerOf2` cases more awkward. paulwalker-arm: Given this is a dag combine does this need to ensure `VT` is a legal type before it can safely…

SDLoc DL(N);		SDLoc DL(N);
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countTrailingZeros();		unsigned Lg2 = Divisor.countTrailingZeros();
SDValue Zero = DAG.getConstant(0, DL, VT);		SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);		SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);

// Add (N0 < 0) ? Pow2 - 1 : 0;		// Add (N0 < 0) ? Pow2 - 1 : 0;
▲ Show 20 Lines • Show All 550 Lines • ▼ Show 20 Lines	if (N.getOpcode() == ISD::SHL)
FromHi = false;		FromHi = false;
else if (N.getOpcode() == ISD::SRL)		else if (N.getOpcode() == ISD::SRL)
FromHi = true;		FromHi = true;
else		else
return false;		return false;

if (!isa<ConstantSDNode>(N.getOperand(1)))		if (!isa<ConstantSDNode>(N.getOperand(1)))
return false;		return false;

		peterwaller-armUnsubmitted Not Done Reply Inline Actions Nit/suggestion: Please add a comment showing the form of the combine. peterwaller-arm: Nit/suggestion: Please add a comment showing the form of the combine.
ShiftAmount = N->getConstantOperandVal(1);		ShiftAmount = N->getConstantOperandVal(1);
Src = N->getOperand(0);		Src = N->getOperand(0);
return true;		return true;
}		}

		efriedmaUnsubmitted Not Done Reply Inline Actions Do we really need to preserve the fact that the operation was originally an intrinsic? efriedma: Do we really need to preserve the fact that the operation was originally an intrinsic?
		bsmithAuthorUnsubmitted Done Reply Inline Actions I believe so yes, the semantics of the predicate are different between SDIV_PRED and the intrinsics, hence why we have different patterns to select these. bsmith: I believe so yes, the semantics of the predicate are different between SDIV_PRED and the…
		efriedmaUnsubmitted Not Done Reply Inline Actions Oh, I see, the intrinsic has an extra operand. efriedma: Oh, I see, the intrinsic has an extra operand.
/// EXTR instruction extracts a contiguous chunk of bits from two existing		/// EXTR instruction extracts a contiguous chunk of bits from two existing
/// registers viewed as a high/low pair. This function looks for the pattern:		/// registers viewed as a high/low pair. This function looks for the pattern:
/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it		/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
/// with an EXTR. Can't quite be done in TableGen because the two immediates		/// with an EXTR. Can't quite be done in TableGen because the two immediates
/// aren't independent.		/// aren't independent.
static SDValue tryCombineToEXTR(SDNode *N,		static SDValue tryCombineToEXTR(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {		TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
▲ Show 20 Lines • Show All 1,467 Lines • ▼ Show 20 Lines	static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_umax:		case Intrinsic::aarch64_sve_umax:
return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);		return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
case Intrinsic::aarch64_sve_lsl:		case Intrinsic::aarch64_sve_lsl:
return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);		return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
case Intrinsic::aarch64_sve_lsr:		case Intrinsic::aarch64_sve_lsr:
return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);		return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
case Intrinsic::aarch64_sve_asr:		case Intrinsic::aarch64_sve_asr:
return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);		return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
case Intrinsic::aarch64_sve_fadd:		case Intrinsic::aarch64_sve_fadd:
return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);		return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
case Intrinsic::aarch64_sve_fsub:		case Intrinsic::aarch64_sve_fsub:
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This doesn't look correct because `AArch64ISD::SRAD_PRED` implies inactive lanes are undef. Perhaps you meant to use `convertMergedOpToPredOp`? paulwalker-arm: This doesn't look correct because `AArch64ISD::SRAD_PRED` implies inactive lanes are undef.
return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);		return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
case Intrinsic::aarch64_sve_fmul:		case Intrinsic::aarch64_sve_fmul:
return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);		return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
case Intrinsic::aarch64_sve_add:		case Intrinsic::aarch64_sve_add:
return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);		return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
case Intrinsic::aarch64_sve_sub:		case Intrinsic::aarch64_sve_sub:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);		return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
case Intrinsic::aarch64_sve_subr:		case Intrinsic::aarch64_sve_subr:
Show All 21 Lines	case Intrinsic::aarch64_sve_sqsub_x:
return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),		return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));		N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqadd_x:		case Intrinsic::aarch64_sve_uqadd_x:
return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),		return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));		N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqsub_x:		case Intrinsic::aarch64_sve_uqsub_x:
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),		return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));		N->getOperand(1), N->getOperand(2));
		case Intrinsic::aarch64_sve_asrd:
		return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
		N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:		case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())		if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),		return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),		N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGE));		N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
break;		break;
case Intrinsic::aarch64_sve_cmphi:		case Intrinsic::aarch64_sve_cmphi:
if (!N->getOperand(2).getValueType().isFloatingPoint())		if (!N->getOperand(2).getValueType().isFloatingPoint())
▲ Show 20 Lines • Show All 3,602 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
SDValue Op, SelectionDAG &DAG) const {		SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);		SDLoc dl(Op);
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();		EVT EltVT = VT.getVectorElementType();

bool Signed = Op.getOpcode() == ISD::SDIV;		bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;		unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;

		bool Negated;
		uint64_t SplatVal;
		if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
		EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
		SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
		SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);

		SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
		SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
		if (Negated)
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Up to you but I think having SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32) is cleaner whilst also being more consistent with the `OP1` in the opcode's name. paulwalker-arm: Up to you but I think having ``` SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.
		Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);

		return convertFromScalableVector(DAG, VT, Res);
		}

// Scalable vector i32/i64 DIV is supported.		// Scalable vector i32/i64 DIV is supported.
if (EltVT == MVT::i32 \|\| EltVT == MVT::i64)		if (EltVT == MVT::i32 \|\| EltVT == MVT::i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode, /OverrideNEON=/true);		return LowerToPredicatedOp(Op, DAG, PredOpcode, /OverrideNEON=/true);

// Scalable vector i8/i16 DIV is not supported. Promote it to i32.		// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);		EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());		EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());		EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
▲ Show 20 Lines • Show All 688 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 193 Lines • ▼ Show 20 Lines
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;		def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;		def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;		def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;		def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;		def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;		def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;		def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;

		def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
		SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
		]>;

		def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>;

def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [		def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
]>;		]>;

// Predicated operations with the result of inactive lanes provided by the last operand.		// Predicated operations with the result of inactive lanes provided by the last operand.
def AArch64clz_mt : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>;		def AArch64clz_mt : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>;		def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>;
▲ Show 20 Lines • Show All 1,360 Lines • ▼ Show 20 Lines	let Predicates = [HasSVEorStreamingSVE] in {
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr", int_aarch64_sve_asr_wide>;		defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr", int_aarch64_sve_asr_wide>;
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr", int_aarch64_sve_lsr_wide>;		defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr", int_aarch64_sve_lsr_wide>;
defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl", int_aarch64_sve_lsl_wide>;		defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl", int_aarch64_sve_lsl_wide>;

// Predicated shifts		// Predicated shifts
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;		defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;		defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;		defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;		defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_m1>;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I don't think you intended this change, which is likely the result of the performIntrinsicCombine issue. paulwalker-arm: I don't think you intended this change, which is likely the result of the…

defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;		defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;		defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;		defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
} // End HasSVEorStreamingSVE		} // End HasSVEorStreamingSVE

let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {		let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;		defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;		defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;		defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;		defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos		} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos

let Predicates = [HasSVEorStreamingSVE] in {		let Predicates = [HasSVEorStreamingSVE] in {
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;		defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;		defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;		defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /isReverseInstr/ 1>;		defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /isReverseInstr/ 1>;
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /isReverseInstr/ 1>;		defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /isReverseInstr/ 1>;
▲ Show 20 Lines • Show All 1,568 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -aarch64-sve-vector-bits-min=256 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
				; RUN: llc -aarch64-sve-vector-bits-min=384 < %s \| FileCheck %s -check-prefixes=CHECK
				; RUN: llc -aarch64-sve-vector-bits-min=512 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
				; RUN: llc -aarch64-sve-vector-bits-min=640 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
				; RUN: llc -aarch64-sve-vector-bits-min=768 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
				; RUN: llc -aarch64-sve-vector-bits-min=896 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
				; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
				; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048

				target triple = "aarch64-unknown-linux-gnu"

				define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
				; CHECK-LABEL: sdiv_v8i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
				; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 32, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
				ret <8 x i8> %res
				}

				define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
				; CHECK-LABEL: sdiv_v16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
				; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
				ret <16 x i8> %res
				}

				define void @sdiv_v32i8(<32 x i8>* %a) #0 {
				; CHECK-LABEL: sdiv_v32i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl32
				; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
				; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
				; CHECK-NEXT: st1b { z0.b }, p0, [x0]
				; CHECK-NEXT: ret
				%op1 = load <32 x i8>, <32 x i8>* %a
				%res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
				store <32 x i8> %res, <32 x i8>* %a
				ret void
				}

				define void @sdiv_v64i8(<64 x i8>* %a) #0 {
				; VBITS_EQ_256-LABEL: sdiv_v64i8:
				; VBITS_EQ_256: // %bb.0:
				; VBITS_EQ_256-NEXT: mov w8, #32
				; VBITS_EQ_256-NEXT: ptrue p0.b, vl32
				; VBITS_EQ_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
				; VBITS_EQ_256-NEXT: ld1b { z1.b }, p0/z, [x0]
				; VBITS_EQ_256-NEXT: asrd z0.b, p0/m, z0.b, #5
				; VBITS_EQ_256-NEXT: asrd z1.b, p0/m, z1.b, #5
				; VBITS_EQ_256-NEXT: st1b { z0.b }, p0, [x0, x8]
				; VBITS_EQ_256-NEXT: st1b { z1.b }, p0, [x0]
				; VBITS_EQ_256-NEXT: ret
				;
				; VBITS_GE_512-LABEL: sdiv_v64i8:
				; VBITS_GE_512: // %bb.0:
				; VBITS_GE_512-NEXT: ptrue p0.b, vl64
				; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
				; VBITS_GE_512-NEXT: asrd z0.b, p0/m, z0.b, #5
				; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
				; VBITS_GE_512-NEXT: ret
				%op1 = load <64 x i8>, <64 x i8>* %a
				%res = sdiv <64 x i8> %op1, shufflevector (<64 x i8> insertelement (<64 x i8> poison, i8 32, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)
				store <64 x i8> %res, <64 x i8>* %a
				ret void
				}

				define void @sdiv_v128i8(<128 x i8>* %a) #0 {
				; VBITS_GE_1024-LABEL: sdiv_v128i8:
				; VBITS_GE_1024: // %bb.0:
				; VBITS_GE_1024-NEXT: ptrue p0.b, vl128
				; VBITS_GE_1024-NEXT: ld1b { z0.b }, p0/z, [x0]
				; VBITS_GE_1024-NEXT: asrd z0.b, p0/m, z0.b, #5
				; VBITS_GE_1024-NEXT: st1b { z0.b }, p0, [x0]
				; VBITS_GE_1024-NEXT: ret
				%op1 = load <128 x i8>, <128 x i8>* %a
				%res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
				store <128 x i8> %res, <128 x i8>* %a
				ret void
				}

				define void @sdiv_v256i8(<256 x i8>* %a) #0 {
				; VBITS_GE_2048-LABEL: sdiv_v256i8:
				; VBITS_GE_2048: // %bb.0:
				; VBITS_GE_2048-NEXT: ptrue p0.b, vl256
				; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0]
				; VBITS_GE_2048-NEXT: asrd z0.b, p0/m, z0.b, #5
				; VBITS_GE_2048-NEXT: st1b { z0.b }, p0, [x0]
				; VBITS_GE_2048-NEXT: ret
				%op1 = load <256 x i8>, <256 x i8>* %a
				%res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
				store <256 x i8> %res, <256 x i8>* %a
				ret void
				}

				define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
				; CHECK-LABEL: sdiv_v4i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
				; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
				ret <4 x i16> %res
				}

				define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
				; CHECK-LABEL: sdiv_v8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
				; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 32, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
				ret <8 x i16> %res
				}

				define void @sdiv_v16i16(<16 x i16>* %a) #0 {
				; CHECK-LABEL: sdiv_v16i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h, vl16
				; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
				; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
				; CHECK-NEXT: st1h { z0.h }, p0, [x0]
				; CHECK-NEXT: ret
				%op1 = load <16 x i16>, <16 x i16>* %a
				%res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
				store <16 x i16> %res, <16 x i16>* %a
				ret void
				}

				define void @sdiv_v32i16(<32 x i16>* %a) #0 {
				; VBITS_EQ_256-LABEL: sdiv_v32i16:
				; VBITS_EQ_256: // %bb.0:
				; VBITS_EQ_256-NEXT: mov x8, #16
				; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
				; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
				; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0]
				; VBITS_EQ_256-NEXT: asrd z0.h, p0/m, z0.h, #5
				; VBITS_EQ_256-NEXT: asrd z1.h, p0/m, z1.h, #5
				; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
				; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
				; VBITS_EQ_256-NEXT: ret
				;
				; VBITS_GE_512-LABEL: sdiv_v32i16:
				; VBITS_GE_512: // %bb.0:
				; VBITS_GE_512-NEXT: ptrue p0.h, vl32
				; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
				; VBITS_GE_512-NEXT: asrd z0.h, p0/m, z0.h, #5
				; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
				; VBITS_GE_512-NEXT: ret
				%op1 = load <32 x i16>, <32 x i16>* %a
				%res = sdiv <32 x i16> %op1, shufflevector (<32 x i16> insertelement (<32 x i16> poison, i16 32, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)
				store <32 x i16> %res, <32 x i16>* %a
				ret void
				}

				define void @sdiv_v64i16(<64 x i16>* %a) #0 {
				; VBITS_GE_1024-LABEL: sdiv_v64i16:
				; VBITS_GE_1024: // %bb.0:
				; VBITS_GE_1024-NEXT: ptrue p0.h, vl64
				; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
				; VBITS_GE_1024-NEXT: asrd z0.h, p0/m, z0.h, #5
				; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0]
				; VBITS_GE_1024-NEXT: ret
				%op1 = load <64 x i16>, <64 x i16>* %a
				%res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
				store <64 x i16> %res, <64 x i16>* %a
				ret void
				}

				define void @sdiv_v128i16(<128 x i16>* %a) #0 {
				; VBITS_GE_2048-LABEL: sdiv_v128i16:
				; VBITS_GE_2048: // %bb.0:
				; VBITS_GE_2048-NEXT: ptrue p0.h, vl128
				; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
				; VBITS_GE_2048-NEXT: asrd z0.h, p0/m, z0.h, #5
				; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0]
				; VBITS_GE_2048-NEXT: ret
				%op1 = load <128 x i16>, <128 x i16>* %a
				%res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
				store <128 x i16> %res, <128 x i16>* %a
				ret void
				}

				define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
				; CHECK-LABEL: sdiv_v2i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
				; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
				ret <2 x i32> %res
				}

				define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
				; CHECK-LABEL: sdiv_v4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
				; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
				ret <4 x i32> %res
				}

				define void @sdiv_v8i32(<8 x i32>* %a) #0 {
				; CHECK-LABEL: sdiv_v8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s, vl8
				; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
				; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
				; CHECK-NEXT: st1w { z0.s }, p0, [x0]
				; CHECK-NEXT: ret
				%op1 = load <8 x i32>, <8 x i32>* %a
				%res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
				store <8 x i32> %res, <8 x i32>* %a
				ret void
				}

				define void @sdiv_v16i32(<16 x i32>* %a) #0 {
				; VBITS_EQ_256-LABEL: sdiv_v16i32:
				; VBITS_EQ_256: // %bb.0:
				; VBITS_EQ_256-NEXT: mov x8, #8
				; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
				; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
				; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0]
				; VBITS_EQ_256-NEXT: asrd z0.s, p0/m, z0.s, #5
				; VBITS_EQ_256-NEXT: asrd z1.s, p0/m, z1.s, #5
				; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
				; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
				; VBITS_EQ_256-NEXT: ret
				;
				; VBITS_GE_512-LABEL: sdiv_v16i32:
				; VBITS_GE_512: // %bb.0:
				; VBITS_GE_512-NEXT: ptrue p0.s, vl16
				; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
				; VBITS_GE_512-NEXT: asrd z0.s, p0/m, z0.s, #5
				; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
				; VBITS_GE_512-NEXT: ret
				%op1 = load <16 x i32>, <16 x i32>* %a
				%res = sdiv <16 x i32> %op1, shufflevector (<16 x i32> insertelement (<16 x i32> poison, i32 32, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)
				store <16 x i32> %res, <16 x i32>* %a
				ret void
				}

				define void @sdiv_v32i32(<32 x i32>* %a) #0 {
				; VBITS_GE_1024-LABEL: sdiv_v32i32:
				; VBITS_GE_1024: // %bb.0:
				; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
				; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
				; VBITS_GE_1024-NEXT: asrd z0.s, p0/m, z0.s, #5
				; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0]
				; VBITS_GE_1024-NEXT: ret
				%op1 = load <32 x i32>, <32 x i32>* %a
				%res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
				store <32 x i32> %res, <32 x i32>* %a
				ret void
				}

				define void @sdiv_v64i32(<64 x i32>* %a) #0 {
				; VBITS_GE_2048-LABEL: sdiv_v64i32:
				; VBITS_GE_2048: // %bb.0:
				; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
				; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
				; VBITS_GE_2048-NEXT: asrd z0.s, p0/m, z0.s, #5
				; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
				; VBITS_GE_2048-NEXT: ret
				%op1 = load <64 x i32>, <64 x i32>* %a
				%res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
				store <64 x i32> %res, <64 x i32>* %a
				ret void
				}

				define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
				; CHECK-LABEL: sdiv_v1i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
				; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
				ret <1 x i64> %res
				}

				; Vector i64 sdiv are not legal for NEON so use SVE when available.
				define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
				; CHECK-LABEL: sdiv_v2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
				; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
				; CHECK-NEXT: ret
				%res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
				ret <2 x i64> %res
				}

				define void @sdiv_v4i64(<4 x i64>* %a) #0 {
				; CHECK-LABEL: sdiv_v4i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d, vl4
				; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
				; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
				; CHECK-NEXT: st1d { z0.d }, p0, [x0]
				; CHECK-NEXT: ret
				%op1 = load <4 x i64>, <4 x i64>* %a
				%res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
				store <4 x i64> %res, <4 x i64>* %a
				ret void
				}

				define void @sdiv_v8i64(<8 x i64>* %a) #0 {
				; VBITS_EQ_256-LABEL: sdiv_v8i64:
				; VBITS_EQ_256: // %bb.0:
				; VBITS_EQ_256-NEXT: mov x8, #4
				; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
				; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
				; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0]
				; VBITS_EQ_256-NEXT: asrd z0.d, p0/m, z0.d, #5
				; VBITS_EQ_256-NEXT: asrd z1.d, p0/m, z1.d, #5
				; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
				; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
				; VBITS_EQ_256-NEXT: ret
				;
				; VBITS_GE_512-LABEL: sdiv_v8i64:
				; VBITS_GE_512: // %bb.0:
				; VBITS_GE_512-NEXT: ptrue p0.d, vl8
				; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
				; VBITS_GE_512-NEXT: asrd z0.d, p0/m, z0.d, #5
				; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
				; VBITS_GE_512-NEXT: ret
				%op1 = load <8 x i64>, <8 x i64>* %a
				%res = sdiv <8 x i64> %op1, shufflevector (<8 x i64> insertelement (<8 x i64> poison, i64 32, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)
				store <8 x i64> %res, <8 x i64>* %a
				ret void
				}

				define void @sdiv_v16i64(<16 x i64>* %a) #0 {
				; VBITS_GE_1024-LABEL: sdiv_v16i64:
				; VBITS_GE_1024: // %bb.0:
				; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
				; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
				; VBITS_GE_1024-NEXT: asrd z0.d, p0/m, z0.d, #5
				; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x0]
				; VBITS_GE_1024-NEXT: ret
				%op1 = load <16 x i64>, <16 x i64>* %a
				%res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
				store <16 x i64> %res, <16 x i64>* %a
				ret void
				}

				define void @sdiv_v32i64(<32 x i64>* %a) #0 {
				; VBITS_GE_2048-LABEL: sdiv_v32i64:
				; VBITS_GE_2048: // %bb.0:
				; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
				; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
				; VBITS_GE_2048-NEXT: asrd z0.d, p0/m, z0.d, #5
				; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x0]
				; VBITS_GE_2048-NEXT: ret
				%op1 = load <32 x i64>, <32 x i64>* %a
				%res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
				store <32 x i64> %res, <32 x i64>* %a
				ret void
				}

				attributes #0 = { "target-features"="+sve" }

llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s \| FileCheck %s

				target triple = "aarch64-unknown-linux-gnu"

				define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 {
				; CHECK-LABEL: sdiv_i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #4
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 16, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
				ret <vscale x 16 x i8> %out
				}

				define <vscale x 16 x i8> @sdiv_i8_neg(<vscale x 16 x i8> %a) #0 {
				; CHECK-LABEL: sdiv_i8_neg:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #6
				; CHECK-NEXT: subr z0.b, z0.b, #0 // =0x0
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -64, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
				ret <vscale x 16 x i8> %out
				}

				define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 {
				; CHECK-LABEL: sdiv_i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #10
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1024, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
				ret <vscale x 8 x i16> %out
				}

				define <vscale x 8 x i16> @sdiv_i16_neg(<vscale x 8 x i16> %a) #0 {
				; CHECK-LABEL: sdiv_i16_neg:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #12
				; CHECK-NEXT: subr z0.h, z0.h, #0 // =0x0
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -4096, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
				ret <vscale x 8 x i16> %out
				}

				define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 {
				; CHECK-LABEL: sdiv_i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #23
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 8388608, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
				ret <vscale x 4 x i32> %out
				}

				define <vscale x 4 x i32> @sdiv_i32_neg(<vscale x 4 x i32> %a) #0 {
				; CHECK-LABEL: sdiv_i32_neg:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #25
				; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -33554432, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
				ret <vscale x 4 x i32> %out
				}

				define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 {
				; CHECK-LABEL: sdiv_i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #53
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9007199254740992, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
				ret <vscale x 2 x i64> %out
				}

				define <vscale x 2 x i64> @sdiv_i64_neg(<vscale x 2 x i64> %a) #0 {
				; CHECK-LABEL: sdiv_i64_neg:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #55
				; CHECK-NEXT: subr z0.d, z0.d, #0 // =0x0
				; CHECK-NEXT: ret
				%out = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -36028797018963968, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
				ret <vscale x 2 x i64> %out
				}

				attributes #0 = { "target-features"="+sve" }

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Generate ASRD instructions for power of 2 signed divides
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 389977

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll

llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Generate ASRD instructions for power of 2 signed dividesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 389977

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll

llvm/test/CodeGen/AArch64/sve-sdiv-pow2.ll

[AArch64][SVE] Generate ASRD instructions for power of 2 signed divides
ClosedPublic