Diff 287700

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 92 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
UMAX_PRED,		UMAX_PRED,
UMIN_PRED,		UMIN_PRED,

// Predicated instructions with the result of inactive lanes provided by the		// Predicated instructions with the result of inactive lanes provided by the
// last operand.		// last operand.
FNEG_MERGE_PASSTHRU,		FNEG_MERGE_PASSTHRU,
SIGN_EXTEND_INREG_MERGE_PASSTHRU,		SIGN_EXTEND_INREG_MERGE_PASSTHRU,
ZERO_EXTEND_INREG_MERGE_PASSTHRU,		ZERO_EXTEND_INREG_MERGE_PASSTHRU,
		FCEIL_MERGE_PASSTHRU,

SETCC_MERGE_ZERO,		SETCC_MERGE_ZERO,

// Arithmetic instructions which write flags.		// Arithmetic instructions which write flags.
ADDS,		ADDS,
SUBS,		SUBS,
ADCS,		ADCS,
SBCS,		SBCS,
ANDS,		ANDS,

// Conditional compares. Operands: left,right,falsecc,cc,flags		// Conditional compares. Operands: left,right,falsecc,cc,flags
CCMP,		CCMP,
CCMN,		CCMN,
FCCMP,		FCCMP,

// Floating point comparison		// Floating point comparison
FCMP,		FCMP,
		efriedmaUnsubmitted Not Done Reply Inline Actions This doesn't match the naming convention we're using for these opcodes. See the comment at the beginning of this file: according to those rules, this should be named FRINTP_MERGE_PASSTHRU. But really, probably better to actually implement FRINTP_PRED, without the extra operand. Maybe try looking at https://reviews.llvm.org/D83765 ? efriedma: This doesn't match the naming convention we're using for these opcodes. See the comment at the…

// Scalar extract		// Scalar extract
EXTR,		EXTR,

// Scalar-to-vector duplication		// Scalar-to-vector duplication
DUP,		DUP,
DUPLANE8,		DUPLANE8,
DUPLANE16,		DUPLANE16,
▲ Show 20 Lines • Show All 738 Lines • ▼ Show 20 Lines	private:
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,		SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
bool OverrideNEON = false) const;		bool OverrideNEON = false) const;
SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Merge is not required, see comment on function definition. paulwalker-arm: Merge is not required, see comment on function definition.
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
▲ Show 20 Lines • Show All 114 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 132 Lines • ▼ Show 20 Lines
static bool isMergePassthruOpcode(unsigned Opc) {		static bool isMergePassthruOpcode(unsigned Opc) {
switch (Opc) {		switch (Opc) {
default:		default:
return false;		return false;
case AArch64ISD::DUP_MERGE_PASSTHRU:		case AArch64ISD::DUP_MERGE_PASSTHRU:
case AArch64ISD::FNEG_MERGE_PASSTHRU:		case AArch64ISD::FNEG_MERGE_PASSTHRU:
case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:		case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:		case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
		case AArch64ISD::FCEIL_MERGE_PASSTHRU:
return true;		return true;
}		}
}		}

AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,		AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)		const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {		: TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so		// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
▲ Show 20 Lines • Show All 818 Lines • ▼ Show 20 Lines	for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);		setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);		setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);		setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);		setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);		setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);		setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);		setOperationAction(ISD::FSUB, VT, Custom);
		setOperationAction(ISD::FCEIL, VT, Custom);
}		}
}		}

// NOTE: Currently this has to happen after computeRegisterProperties rather		// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.		// than the preferred option of combining it with the addRegisterClass call.
if (useSVEForFixedLengthVectors()) {		if (useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())		for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))		if (useSVEForFixedLengthVectorVT(VT))
▲ Show 20 Lines • Show All 488 Lines • ▼ Show 20 Lines	case AArch64ISD::FIRST_NUMBER:
MAKE_CASE(AArch64ISD::SRL_PRED)		MAKE_CASE(AArch64ISD::SRL_PRED)
MAKE_CASE(AArch64ISD::SUB_PRED)		MAKE_CASE(AArch64ISD::SUB_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)		MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)		MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)		MAKE_CASE(AArch64ISD::UMIN_PRED)
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
		MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)		MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ADC)		MAKE_CASE(AArch64ISD::ADC)
MAKE_CASE(AArch64ISD::SBC)		MAKE_CASE(AArch64ISD::SBC)
MAKE_CASE(AArch64ISD::ADDS)		MAKE_CASE(AArch64ISD::ADDS)
MAKE_CASE(AArch64ISD::SUBS)		MAKE_CASE(AArch64ISD::SUBS)
MAKE_CASE(AArch64ISD::ADCS)		MAKE_CASE(AArch64ISD::ADCS)
MAKE_CASE(AArch64ISD::SBCS)		MAKE_CASE(AArch64ISD::SBCS)
MAKE_CASE(AArch64ISD::ANDS)		MAKE_CASE(AArch64ISD::ANDS)
▲ Show 20 Lines • Show All 1,845 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_dupq_lane:		case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);		return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:		case Intrinsic::aarch64_sve_convert_from_svbool:
return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),		return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
Op.getOperand(1));		Op.getOperand(1));
case Intrinsic::aarch64_sve_fneg:		case Intrinsic::aarch64_sve_fneg:
return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),		return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));		Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
		case Intrinsic::aarch64_sve_frintp:
		return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
		Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_convert_to_svbool: {		case Intrinsic::aarch64_sve_convert_to_svbool: {
EVT OutVT = Op.getValueType();		EVT OutVT = Op.getValueType();
EVT InVT = Op.getOperand(1).getValueType();		EVT InVT = Op.getOperand(1).getValueType();
// Return the operand if the cast isn't changing type,		// Return the operand if the cast isn't changing type,
// i.e. <n x 16 x i1> -> <n x 16 x i1>		// i.e. <n x 16 x i1> -> <n x 16 x i1>
if (InVT == OutVT)		if (InVT == OutVT)
return Op.getOperand(1);		return Op.getOperand(1);
// Otherwise, zero the newly introduced lanes.		// Otherwise, zero the newly introduced lanes.
▲ Show 20 Lines • Show All 211 Lines • ▼ Show 20 Lines	if (VT.isVector()) {
return Result;		return Result;
}		}

return SDValue();		return SDValue();
}		}

SDValue AArch64TargetLowering::LowerOperation(SDValue Op,		SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");		LLVM_DEBUG(dbgs() << "Custom lowering: ");
		efriedmaUnsubmitted Not Done Reply Inline Actions We shouldn't be calling getMachineNode in legalization. The way to get an uninitialized value before isel is getUNDEF(). Also, this looks like it's creating an FCEIL with two operands. That's a bad idea; we have a bunch of assertions in getNode() to ensure SelectionDAG nodes are well-formed. Even if those assertions don't catch this issue right now, they might in the future. efriedma: We shouldn't be calling getMachineNode in legalization. The way to get an uninitialized value…
LLVM_DEBUG(Op.dump());		LLVM_DEBUG(Op.dump());

switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default:		default:
llvm_unreachable("unimplemented operand");		llvm_unreachable("unimplemented operand");
return SDValue();		return SDValue();
case ISD::BITCAST:		case ISD::BITCAST:
return LowerBITCAST(Op, DAG);		return LowerBITCAST(Op, DAG);
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::FMA:		case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:		case ISD::FDIV:
if (Op.getValueType() == MVT::f128)		if (Op.getValueType() == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);		return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
case ISD::FNEG:		case ISD::FNEG:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
		case ISD::FCEIL:
		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
case ISD::FP_ROUND:		case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:		case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);		return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:		case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);		return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:		case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);		return LowerFRAMEADDR(Op, DAG);
case ISD::SPONENTRY:		case ISD::SPONENTRY:
Show All 15 Lines	SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_SUBVECTOR:		case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);		return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:		case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);		return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:		case ISD::SDIV:
case ISD::UDIV:		case ISD::UDIV:
return LowerDIV(Op, DAG);		return LowerDIV(Op, DAG);
case ISD::SMIN:		case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
		paulwalker-armUnsubmitted Not Done Reply Inline Actions To make the relationship clearer between the original nodes and their predicated counterparts we just add a suffix. So in this case the predicated node should be named FCEIL_PRED. paulwalker-arm: To make the relationship clearer between the original nodes and their predicated counterparts…
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::UMIN:		case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::SMAX:		case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::UMAX:		case ISD::UMAX:
▲ Show 20 Lines • Show All 8,654 Lines • ▼ Show 20 Lines	case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);		AArch64CC::ANY_ACTIVE);
case Intrinsic::aarch64_sve_ptest_first:		case Intrinsic::aarch64_sve_ptest_first:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::FIRST_ACTIVE);		AArch64CC::FIRST_ACTIVE);
case Intrinsic::aarch64_sve_ptest_last:		case Intrinsic::aarch64_sve_ptest_last:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::LAST_ACTIVE);		AArch64CC::LAST_ACTIVE);
}		}
		paulwalker-armUnsubmitted Not Done Reply Inline Actions As suggested by Eli you only need to implement the FRINTP_PRED variant, which doesn't set any expectation on the result of inactive lanes. Doing this means the intrinsics can remain untouched. FYI: I'm in the process of converting the last few remaining instances of _MERGE_OP1 nodes (only the shifts and max/min remain) because we have no real need for them as yet and I'm trying to ensure we don't tie the register allocator's hands when code generating normal IR. paulwalker-arm: As suggested by Eli you only need to implement the FRINTP_PRED variant, which doesn't set any…
return SDValue();		return SDValue();
}		}

static SDValue performExtendCombine(SDNode *N,		static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then		// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
// we can convert that DUP into another extract_high (of a bigger DUP), which		// we can convert that DUP into another extract_high (of a bigger DUP), which
▲ Show 20 Lines • Show All 3,182 Lines • ▼ Show 20 Lines	if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {

auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);		auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);		return convertFromScalableVector(DAG, VT, ScalableRes);
}		}

assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");		assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");

SmallVector<SDValue, 4> Operands = {Pg};		SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {		for (const SDValue &V : Op->op_values()) {
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I don't understand why you need this change. By definitions the _PRED nodes should take the form: ISDNODE Op1, Op2...OpN -> ISDNODE_PRED Pg, Op1, Op2...OpN I would expect this function to do what's required without any changes. I suspect any issues are likely down to mistakes within the isel patterns. paulwalker-arm: I don't understand why you need this change. By definitions the _PRED nodes should take the…
assert((isa<CondCodeSDNode>(V) \|\| V.getValueType().isScalableVector()) &&		assert((isa<CondCodeSDNode>(V) \|\| V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");		"Only scalable vectors are supported!");
Operands.push_back(V);		Operands.push_back(V);
}		}

if (isMergePassthruOpcode(NewOp))		if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(VT));		Operands.push_back(DAG.getUNDEF(VT));

▲ Show 20 Lines • Show All 61 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 156 Lines • ▼ Show 20 Lines
def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;		def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;		def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;		def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;		def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;		def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;		def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;		def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;

def SDT_AArch64Arith : SDTypeProfile<1, 3, [		def SDT_AArch64Arith : SDTypeProfile<1, 3, [
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This is incorrect because the unary _PRED nodes should have 2 operands. The predicated followed by the data operand. See SDT_AArch64Arith for inspiration, where you just need to drop the stuff related to Op3. paulwalker-arm: This is incorrect because the unary _PRED nodes should have 2 operands. The predicated followed…
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>
]>;		]>;

def SDT_AArch64FMA : SDTypeProfile<1, 4, [		def SDT_AArch64FMA : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
]>;		]>;
Show All 23 Lines	def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
]>;		]>;

// Predicated operations with the result of inactive lanes provided by the last operand.		// Predicated operations with the result of inactive lanes provided by the last operand.
def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>;		def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;		def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;		def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
		def AArch64fceil_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I've tried to use the AArch64 names here, so AArch64frintp_mt, as it makes it easy to spot mismatches with the instruction definitions. paulwalker-arm: I've tried to use the AArch64 names here, so AArch64frintp_mt, as it makes it easy to spot…

def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;		def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;		def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;		def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;		def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;

def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;		def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;		def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
▲ Show 20 Lines • Show All 1,198 Lines • ▼ Show 20 Lines	multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;		defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;		defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;		defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;		defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;

defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;		defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;		defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp, AArch64fceil_mt>;
defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;		defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;		defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;		defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;		defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;		defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;		defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;		defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;

▲ Show 20 Lines • Show All 1,238 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,280 Lines • ▼ Show 20 Lines	multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
RegisterOperand o_zprtype,		RegisterOperand o_zprtype,
SDPatternOperator op, ValueType vt1,		SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {		ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;		def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;

def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;		def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;
}		}

multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {		multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op_merge,
		SDPatternOperator op_pt = null_frag> {
def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;		def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Happy as is, but you could enhance readability if the operators are named op_merge and op_pred. paulwalker-arm: Happy as is, but you could enhance readability if the operators are named op_merge and op_pred.
def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;		def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;		def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;

def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;		def : SVE_3_Op_Pat<nxv8f16, op_merge, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;		def : SVE_3_Op_Pat<nxv4f32, op_merge, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;		def : SVE_3_Op_Pat<nxv2f64, op_merge, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;

		paulwalker-armUnsubmitted Not Done Reply Inline Actions There should be no need to explicitly create the PTRUE here because the _PRED nodes already provide the predicate (which will be Op1 when LowerToPredicatedOp is restored) that should be used directly. To be honest I'm not entirely sure how this even works since when creating FRINTP_PRED you currently set Op1 to DAG.getUNDEF(). There should also be patterns for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32). paulwalker-arm: There should be no need to explicitly create the PTRUE here because the _PRED nodes already…
		def : SVE_1_Op_Passthru_Pat<nxv8f16, op_pt, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
		def : SVE_1_Op_Passthru_Pat<nxv4f16, op_pt, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
		def : SVE_1_Op_Passthru_Pat<nxv2f16, op_pt, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
		def : SVE_1_Op_Passthru_Pat<nxv4f32, op_pt, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
		def : SVE_1_Op_Passthru_Pat<nxv2f32, op_pt, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
		def : SVE_1_Op_Passthru_Pat<nxv2f64, op_pt, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}		}

multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {		multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;		def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;		def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;		def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;

def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;		def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
▲ Show 20 Lines • Show All 5,621 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fp.ll

	Show First 20 Lines • Show All 474 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: st1w { z0.s }, p0, [x1]			; CHECK-NEXT: st1w { z0.s }, p0, [x1]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%A = load <vscale x 4 x float>, <vscale x 4 x float>* %P1, align 16			%A = load <vscale x 4 x float>, <vscale x 4 x float>* %P1, align 16
	store <vscale x 4 x float> %A, <vscale x 4 x float>* %P2, align 16			store <vscale x 4 x float> %A, <vscale x 4 x float>* %P2, align 16
	ret void			ret void
	}			}

				; FCEIL

				define <vscale x 8 x half> @frintp_nxv8f16(<vscale x 8 x half> %a) {
				paulwalker-armUnsubmitted Not Done Reply Inline Actions You'll need tests for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32). paulwalker-arm: You'll need tests for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32).
				; CHECK-LABEL: frintp_nxv8f16:
				; CHECK: ptrue p0.h
				; CHECK-NEXT: frintp z0.h, p0/m, z0.h
				; CHECK-NEXT: ret
				%res = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %a)
				ret <vscale x 8 x half> %res
				}

				define <vscale x 4 x half> @frintp_nxv4f16(<vscale x 4 x half> %a) {
				; CHECK-LABEL: frintp_nxv4f16:
				; CHECK: ptrue p0.s
				; CHECK-NEXT: frintp z0.h, p0/m, z0.h
				; CHECK-NEXT: ret
				%res = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %a)
				ret <vscale x 4 x half> %res
				}

				define <vscale x 2 x half> @frintp_nxv2f16(<vscale x 2 x half> %a) {
				; CHECK-LABEL: frintp_nxv2f16:
				; CHECK: ptrue p0.d
				; CHECK-NEXT: frintp z0.h, p0/m, z0.h
				; CHECK-NEXT: ret
				%res = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %a)
				ret <vscale x 2 x half> %res
				}

				define <vscale x 4 x float> @frintp_nxv4f32(<vscale x 4 x float> %a) {
				; CHECK-LABEL: frintp_nxv4f32:
				; CHECK: ptrue p0.s
				; CHECK-NEXT: frintp z0.s, p0/m, z0.s
				; CHECK-NEXT: ret
				%res = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %a)
				ret <vscale x 4 x float> %res
				}

				define <vscale x 2 x float> @frintp_nxv2f32(<vscale x 2 x float> %a) {
				; CHECK-LABEL: frintp_nxv2f32:
				; CHECK: ptrue p0.d
				; CHECK-NEXT: frintp z0.s, p0/m, z0.s
				; CHECK-NEXT: ret
				%res = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %a)
				ret <vscale x 2 x float> %res
				}

				define <vscale x 2 x double> @frintp_nxv2f64(<vscale x 2 x double> %a) {
				; CHECK-LABEL: frintp_nxv2f64:
				; CHECK: ptrue p0.d
				; CHECK-NEXT: frintp z0.d, p0/m, z0.d
				; CHECK-NEXT: ret
				%res = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %a)
				ret <vscale x 2 x double> %res
				}

	declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
	declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

	declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
	declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

	declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
	declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
	declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)			declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
	declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)			declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
	declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)			declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)

				declare <vscale x 8 x half> @llvm.ceil.nxv8f16( <vscale x 8 x half>)
				declare <vscale x 4 x half> @llvm.ceil.nxv4f16( <vscale x 4 x half>)
				declare <vscale x 2 x half> @llvm.ceil.nxv2f16( <vscale x 2 x half>)
				declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
				declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>)
				declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)

	; Function Attrs: nounwind readnone			; Function Attrs: nounwind readnone
	declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2			declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add lowering for llvm fceil
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 287700

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-fp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add lowering for llvm fceilClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 287700

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-fp.ll

[AArch64][SVE] Add lowering for llvm fceil
ClosedPublic