Diff 281761

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 85 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
SMAX_MERGE_OP1,		SMAX_MERGE_OP1,
UMAX_MERGE_OP1,		UMAX_MERGE_OP1,
SHL_MERGE_OP1,		SHL_MERGE_OP1,
SRL_MERGE_OP1,		SRL_MERGE_OP1,
SRA_MERGE_OP1,		SRA_MERGE_OP1,

SETCC_MERGE_ZERO,		SETCC_MERGE_ZERO,

		//Unary Floating Point Operations
		FRINTP_PRED,

// Arithmetic instructions which write flags.		// Arithmetic instructions which write flags.
ADDS,		ADDS,
SUBS,		SUBS,
ADCS,		ADCS,
SBCS,		SBCS,
ANDS,		ANDS,

// Conditional compares. Operands: left,right,falsecc,cc,flags		// Conditional compares. Operands: left,right,falsecc,cc,flags
CCMP,		CCMP,
CCMN,		CCMN,
FCCMP,		FCCMP,

// Floating point comparison		// Floating point comparison
FCMP,		FCMP,
		efriedmaUnsubmitted Not Done Reply Inline Actions This doesn't match the naming convention we're using for these opcodes. See the comment at the beginning of this file: according to those rules, this should be named FRINTP_MERGE_PASSTHRU. But really, probably better to actually implement FRINTP_PRED, without the extra operand. Maybe try looking at https://reviews.llvm.org/D83765 ? efriedma: This doesn't match the naming convention we're using for these opcodes. See the comment at the…

// Scalar extract		// Scalar extract
EXTR,		EXTR,

// Scalar-to-vector duplication		// Scalar-to-vector duplication
DUP,		DUP,
DUPLANE8,		DUPLANE8,
DUPLANE16,		DUPLANE16,
▲ Show 20 Lines • Show All 745 Lines • ▼ Show 20 Lines	private:
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,		SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOp) const;		unsigned NewOp, bool Merge = false) const;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Merge is not required, see comment on function definition. paulwalker-arm: Merge is not required, see comment on function definition.
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,		SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 950 Lines • ▼ Show 20 Lines	for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);		setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);		setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);		setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);		setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);		setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);		setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);		setOperationAction(ISD::FSUB, VT, Custom);
		setOperationAction(ISD::FCEIL, VT, Custom);
}		}
}		}

// NOTE: Currently this has to happen after computeRegisterProperties rather		// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.		// than the preferred option of combining it with the addRegisterClass call.
if (useSVEForFixedLengthVectors()) {		if (useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())		for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))		if (useSVEForFixedLengthVectorVT(VT))
▲ Show 20 Lines • Show All 426 Lines • ▼ Show 20 Lines	case AArch64ISD::FIRST_NUMBER:
MAKE_CASE(AArch64ISD::CSINV)		MAKE_CASE(AArch64ISD::CSINV)
MAKE_CASE(AArch64ISD::CSNEG)		MAKE_CASE(AArch64ISD::CSNEG)
MAKE_CASE(AArch64ISD::CSINC)		MAKE_CASE(AArch64ISD::CSINC)
MAKE_CASE(AArch64ISD::THREAD_POINTER)		MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)		MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ADD_PRED)		MAKE_CASE(AArch64ISD::ADD_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)		MAKE_CASE(AArch64ISD::SDIV_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)		MAKE_CASE(AArch64ISD::UDIV_PRED)
		MAKE_CASE(AArch64ISD::FRINTP_PRED)
MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1)		MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1)
MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1)		MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1)
MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1)		MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1)
MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1)		MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1)
MAKE_CASE(AArch64ISD::SHL_MERGE_OP1)		MAKE_CASE(AArch64ISD::SHL_MERGE_OP1)
MAKE_CASE(AArch64ISD::SRL_MERGE_OP1)		MAKE_CASE(AArch64ISD::SRL_MERGE_OP1)
MAKE_CASE(AArch64ISD::SRA_MERGE_OP1)		MAKE_CASE(AArch64ISD::SRA_MERGE_OP1)
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)		MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
▲ Show 20 Lines • Show All 2,027 Lines • ▼ Show 20 Lines	if (VT.isVector()) {
return Result;		return Result;
}		}

return SDValue();		return SDValue();
}		}

SDValue AArch64TargetLowering::LowerOperation(SDValue Op,		SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");		LLVM_DEBUG(dbgs() << "Custom lowering: ");
		efriedmaUnsubmitted Not Done Reply Inline Actions We shouldn't be calling getMachineNode in legalization. The way to get an uninitialized value before isel is getUNDEF(). Also, this looks like it's creating an FCEIL with two operands. That's a bad idea; we have a bunch of assertions in getNode() to ensure SelectionDAG nodes are well-formed. Even if those assertions don't catch this issue right now, they might in the future. efriedma: We shouldn't be calling getMachineNode in legalization. The way to get an uninitialized value…
LLVM_DEBUG(Op.dump());		LLVM_DEBUG(Op.dump());

switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default:		default:
llvm_unreachable("unimplemented operand");		llvm_unreachable("unimplemented operand");
return SDValue();		return SDValue();
case ISD::BITCAST:		case ISD::BITCAST:
return LowerBITCAST(Op, DAG);		return LowerBITCAST(Op, DAG);
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_SUBVECTOR:		case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);		return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:		case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);		return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:		case ISD::SDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
case ISD::UDIV:		case ISD::UDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);
		case ISD::FCEIL:
		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINTP_PRED, true);
		paulwalker-armUnsubmitted Not Done Reply Inline Actions To make the relationship clearer between the original nodes and their predicated counterparts we just add a suffix. So in this case the predicated node should be named FCEIL_PRED. paulwalker-arm: To make the relationship clearer between the original nodes and their predicated counterparts…
case ISD::SMIN:		case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);
case ISD::UMIN:		case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);
case ISD::SMAX:		case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);
case ISD::UMAX:		case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);
▲ Show 20 Lines • Show All 8,563 Lines • ▼ Show 20 Lines	case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);		AArch64CC::ANY_ACTIVE);
case Intrinsic::aarch64_sve_ptest_first:		case Intrinsic::aarch64_sve_ptest_first:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::FIRST_ACTIVE);		AArch64CC::FIRST_ACTIVE);
case Intrinsic::aarch64_sve_ptest_last:		case Intrinsic::aarch64_sve_ptest_last:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::LAST_ACTIVE);		AArch64CC::LAST_ACTIVE);
}		}
		paulwalker-armUnsubmitted Not Done Reply Inline Actions As suggested by Eli you only need to implement the FRINTP_PRED variant, which doesn't set any expectation on the result of inactive lanes. Doing this means the intrinsics can remain untouched. FYI: I'm in the process of converting the last few remaining instances of _MERGE_OP1 nodes (only the shifts and max/min remain) because we have no real need for them as yet and I'm trying to ensure we don't tie the register allocator's hands when code generating normal IR. paulwalker-arm: As suggested by Eli you only need to implement the FRINTP_PRED variant, which doesn't set any…
return SDValue();		return SDValue();
}		}

static SDValue performExtendCombine(SDNode *N,		static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then		// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
// we can convert that DUP into another extract_high (of a bigger DUP), which		// we can convert that DUP into another extract_high (of a bigger DUP), which
▲ Show 20 Lines • Show All 3,049 Lines • ▼ Show 20 Lines	case MVT::nxv8i16:
break;		break;
}		}

return convertFromScalableVector(DAG, VT, Val);		return convertFromScalableVector(DAG, VT, Val);
}		}

SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,		SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,		SelectionDAG &DAG,
unsigned NewOp) const {		unsigned NewOp,
		bool Merge) const {
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
SDLoc DL(Op);		SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);		auto Pg = getPredicateForVector(DAG, DL, VT);

if (useSVEForFixedLengthVectorVT(VT)) {		if (useSVEForFixedLengthVectorVT(VT)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);		EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

// Create list of operands by convereting existing ones to scalable types.		// Create list of operands by convereting existing ones to scalable types.
Show All 11 Lines	if (useSVEForFixedLengthVectorVT(VT)) {

auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);		auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);		return convertFromScalableVector(DAG, VT, ScalableRes);
}		}

assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");		assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");

SmallVector<SDValue, 4> Operands = {Pg};		SmallVector<SDValue, 4> Operands = {Pg};
		if(Merge)
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I don't understand why you need this change. By definitions the _PRED nodes should take the form: ISDNODE Op1, Op2...OpN -> ISDNODE_PRED Pg, Op1, Op2...OpN I would expect this function to do what's required without any changes. I suspect any issues are likely down to mistakes within the isel patterns. paulwalker-arm: I don't understand why you need this change. By definitions the _PRED nodes should take the…
		Operands.insert(Operands.begin(), DAG.getUNDEF(VT));
for (const SDValue &V : Op->op_values()) {		for (const SDValue &V : Op->op_values()) {
assert((isa<CondCodeSDNode>(V) \|\| V.getValueType().isScalableVector()) &&		assert((isa<CondCodeSDNode>(V) \|\| V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");		"Only scalable vectors are supported!");
Operands.push_back(V);		Operands.push_back(V);
}		}

return DAG.getNode(NewOp, DL, VT, Operands);		return DAG.getNode(NewOp, DL, VT, Operands);
}		}

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 156 Lines • ▼ Show 20 Lines
def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;		def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;		def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;		def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;		def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;		def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;		def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;		def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;

		def SDT_AArch64UnaryOp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This is incorrect because the unary _PRED nodes should have 2 operands. The predicated followed by the data operand. See SDT_AArch64Arith for inspiration, where you just need to drop the stuff related to Op3. paulwalker-arm: This is incorrect because the unary _PRED nodes should have 2 operands. The predicated followed…
		def AArch64frintp_p : SDNode<"AArch64ISD::FRINTP_PRED", SDT_AArch64UnaryOp>;

def SDT_AArch64Arith : SDTypeProfile<1, 3, [		def SDT_AArch64Arith : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>
]>;		]>;

def SDT_AArch64FMA : SDTypeProfile<1, 4, [		def SDT_AArch64FMA : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
]>;		]>;

// Predicated operations with the result of inactive lanes being unspecified.		// Predicated operations with the result of inactive lanes being unspecified.
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;		def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;		def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;		def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;		def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;		def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;		def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;		def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;		def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;

// Merging op1 into the inactive lanes.		// Merging op1 into the inactive lanes.
def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;		def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I've tried to use the AArch64 names here, so AArch64frintp_mt, as it makes it easy to spot mismatches with the instruction definitions. paulwalker-arm: I've tried to use the AArch64 names here, so AArch64frintp_mt, as it makes it easy to spot…
def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;		def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;
def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;		def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;
def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;		def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;
def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;		def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;
def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;		def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;
def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;		def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;

def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;		def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
▲ Show 20 Lines • Show All 1,181 Lines • ▼ Show 20 Lines	multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;		defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;		defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;		defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;		defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;

defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;		defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;		defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp, AArch64frintp_p>;
defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;		defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;		defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;		defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;		defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;		defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;		defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;		defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;

▲ Show 20 Lines • Show All 1,238 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,261 Lines • ▼ Show 20 Lines	multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
RegisterOperand o_zprtype,		RegisterOperand o_zprtype,
SDPatternOperator op, ValueType vt1,		SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {		ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;		def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;

def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;		def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;
}		}

multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {		multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op,
		SDPatternOperator op2 = null_frag> {
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Happy as is, but you could enhance readability if the operators are named op_merge and op_pred. paulwalker-arm: Happy as is, but you could enhance readability if the operators are named op_merge and op_pred.
def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;		def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;		def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;		def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;

def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;		def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;		def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;		def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;

		def : Pat<(nxv8f16 (op2 nxv8f16:$Op1)),
		(!cast<Instruction>(NAME # _H) (IMPLICIT_DEF), (PTRUE_H 31), $Op1)>;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions There should be no need to explicitly create the PTRUE here because the _PRED nodes already provide the predicate (which will be Op1 when LowerToPredicatedOp is restored) that should be used directly. To be honest I'm not entirely sure how this even works since when creating FRINTP_PRED you currently set Op1 to DAG.getUNDEF(). There should also be patterns for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32). paulwalker-arm: There should be no need to explicitly create the PTRUE here because the _PRED nodes already…
		def : Pat<(nxv4f32 (op2 nxv4f32:$Op1)),
		(!cast<Instruction>(NAME # _S) (IMPLICIT_DEF), (PTRUE_S 31), $Op1)>;
		def : Pat<(nxv2f64 (op2 nxv2f64:$Op1)),
		(!cast<Instruction>(NAME # _D) (IMPLICIT_DEF), (PTRUE_D 31), $Op1)>;
}		}

multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {		multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;		def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;		def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;		def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;

def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;		def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
▲ Show 20 Lines • Show All 5,563 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fp.ll

	Show First 20 Lines • Show All 402 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: st1w { z0.s }, p0, [x1]			; CHECK-NEXT: st1w { z0.s }, p0, [x1]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%A = load <vscale x 4 x float>, <vscale x 4 x float>* %P1, align 16			%A = load <vscale x 4 x float>, <vscale x 4 x float>* %P1, align 16
	store <vscale x 4 x float> %A, <vscale x 4 x float>* %P2, align 16			store <vscale x 4 x float> %A, <vscale x 4 x float>* %P2, align 16
	ret void			ret void
	}			}

				; FCEIL

				define <vscale x 8 x half> @frintp_nxv8f16(<vscale x 8 x half> %a) {
				paulwalker-armUnsubmitted Not Done Reply Inline Actions You'll need tests for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32). paulwalker-arm: You'll need tests for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32).
				; CHECK-LABEL: frintp_nxv8f16:
				; CHECK: ptrue p0.h
				; CHECK-NEXT: frintp z0.h, p0/m, z0.h
				; CHECK-NEXT: ret
				%res = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %a)
				ret <vscale x 8 x half> %res
				}

				define <vscale x 4 x float> @frintp_nxv4f32(<vscale x 4 x float> %a) {
				; CHECK-LABEL: frintp_nxv4f32:
				; CHECK: ptrue p0.s
				; CHECK-NEXT: frintp z0.s, p0/m, z0.s
				; CHECK-NEXT: ret
				%res = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %a)
				ret <vscale x 4 x float> %res
				}

				define <vscale x 2 x double> @frintp_nxv2f64(<vscale x 2 x double> %a) {
				; CHECK-LABEL: frintp_nxv2f64:
				; CHECK: ptrue p0.d
				; CHECK-NEXT: frintp z0.d, p0/m, z0.d
				; CHECK-NEXT: ret
				%res = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %a)
				ret <vscale x 2 x double> %res
				}

	declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
	declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

	declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
	declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

	declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
	declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
	declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)			declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
	declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)			declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
	declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)			declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)

				declare <vscale x 8 x half> @llvm.ceil.nxv8f16( <vscale x 8 x half>)
				declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
				declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)

	; Function Attrs: nounwind readnone			; Function Attrs: nounwind readnone
	declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2			declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add lowering for llvm fceil
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 281761

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-fp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add lowering for llvm fceilClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 281761

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-fp.ll

[AArch64][SVE] Add lowering for llvm fceil
ClosedPublic