Diff 280558

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
SBCS,		SBCS,
ANDS,		ANDS,

// Conditional compares. Operands: left,right,falsecc,cc,flags		// Conditional compares. Operands: left,right,falsecc,cc,flags
CCMP,		CCMP,
CCMN,		CCMN,
FCCMP,		FCCMP,

		//Uniary Floating Point Operation
		FRINTP_PRED,
		efriedmaUnsubmitted Not Done Reply Inline Actions This doesn't match the naming convention we're using for these opcodes. See the comment at the beginning of this file: according to those rules, this should be named FRINTP_MERGE_PASSTHRU. But really, probably better to actually implement FRINTP_PRED, without the extra operand. Maybe try looking at https://reviews.llvm.org/D83765 ? efriedma: This doesn't match the naming convention we're using for these opcodes. See the comment at the…

// Floating point comparison		// Floating point comparison
FCMP,		FCMP,

// Scalar extract		// Scalar extract
EXTR,		EXTR,

// Scalar-to-vector duplication		// Scalar-to-vector duplication
DUP,		DUP,
▲ Show 20 Lines • Show All 747 Lines • ▼ Show 20 Lines	private:
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,		SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOp) const;		unsigned NewOp, bool Merging = false) const;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Merge is not required, see comment on function definition. paulwalker-arm: Merge is not required, see comment on function definition.
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,		SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 948 Lines • ▼ Show 20 Lines	for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);		setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);		setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);		setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);		setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);		setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);		setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);		setOperationAction(ISD::FSUB, VT, Custom);
		setOperationAction(ISD::FCEIL, VT, Custom);
}		}
}		}

// NOTE: Currently this has to happen after computeRegisterProperties rather		// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.		// than the preferred option of combining it with the addRegisterClass call.
if (useSVEForFixedLengthVectors()) {		if (useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())		for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))		if (useSVEForFixedLengthVectorVT(VT))
▲ Show 20 Lines • Show All 646 Lines • ▼ Show 20 Lines	case AArch64ISD::FIRST_NUMBER:
MAKE_CASE(AArch64ISD::SST1_IMM_PRED)		MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_PRED)		MAKE_CASE(AArch64ISD::SSTNT1_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)		MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
MAKE_CASE(AArch64ISD::LDP)		MAKE_CASE(AArch64ISD::LDP)
MAKE_CASE(AArch64ISD::STP)		MAKE_CASE(AArch64ISD::STP)
MAKE_CASE(AArch64ISD::STNP)		MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)		MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::INDEX_VECTOR)		MAKE_CASE(AArch64ISD::INDEX_VECTOR)
		MAKE_CASE(AArch64ISD::FRINTP_PRED)
}		}
#undef MAKE_CASE		#undef MAKE_CASE
return nullptr;		return nullptr;
}		}

MachineBasicBlock *		MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,		AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {		MachineBasicBlock *MBB) const {
▲ Show 20 Lines • Show All 1,804 Lines • ▼ Show 20 Lines	SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),		AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},		{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());		StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;		return Result;
}		}

return SDValue();		return SDValue();
}		}
		static auto CreateNodeWithImplicitDef(SDValue Op, SelectionDAG &DAG){
		EVT VT = Op.getValueType();
		SDLoc DL(Op);
		auto NewOperand = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0);
		efriedmaUnsubmitted Not Done Reply Inline Actions We shouldn't be calling getMachineNode in legalization. The way to get an uninitialized value before isel is getUNDEF(). Also, this looks like it's creating an FCEIL with two operands. That's a bad idea; we have a bunch of assertions in getNode() to ensure SelectionDAG nodes are well-formed. Even if those assertions don't catch this issue right now, they might in the future. efriedma: We shouldn't be calling getMachineNode in legalization. The way to get an uninitialized value…
		return DAG.getNode(Op.getOpcode(), DL, VT, NewOperand, Op->getOperand(0));
		}

SDValue AArch64TargetLowering::LowerOperation(SDValue Op,		SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");		LLVM_DEBUG(dbgs() << "Custom lowering: ");
LLVM_DEBUG(Op.dump());		LLVM_DEBUG(Op.dump());

switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default:		default:
llvm_unreachable("unimplemented operand");		llvm_unreachable("unimplemented operand");
return SDValue();		return SDValue();
case ISD::BITCAST:		case ISD::BITCAST:
return LowerBITCAST(Op, DAG);		return LowerBITCAST(Op, DAG);
case ISD::GlobalAddress:		case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);		return LowerGlobalAddress(Op, DAG);
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	if (Op.getValueType() == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::MUL_F128);		return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
case ISD::FMA:		case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:		case ISD::FDIV:
if (Op.getValueType() == MVT::f128)		if (Op.getValueType() == MVT::f128)
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);		return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
		case ISD::FCEIL:
		return LowerToPredicatedOp(CreateNodeWithImplicitDef(Op, DAG), DAG, AArch64ISD::FRINTP_PRED, true);
case ISD::FP_ROUND:		case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:		case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);		return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:		case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);		return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:		case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);		return LowerFRAMEADDR(Op, DAG);
case ISD::SPONENTRY:		case ISD::SPONENTRY:
Show All 16 Lines	case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);		return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:		case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);		return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:		case ISD::SDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
case ISD::UDIV:		case ISD::UDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);
case ISD::SMIN:		case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);
		paulwalker-armUnsubmitted Not Done Reply Inline Actions To make the relationship clearer between the original nodes and their predicated counterparts we just add a suffix. So in this case the predicated node should be named FCEIL_PRED. paulwalker-arm: To make the relationship clearer between the original nodes and their predicated counterparts…
case ISD::UMIN:		case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);
case ISD::SMAX:		case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);
case ISD::UMAX:		case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);
case ISD::SRA:		case ISD::SRA:
case ISD::SRL:		case ISD::SRL:
▲ Show 20 Lines • Show All 4,379 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);		SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);		SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);

// create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...		// create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);		SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);		return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
}		}

		static SDValue combineSVEPredIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG) {
		SDLoc DL(N);
		EVT VT = N->getValueType(0);
		SDValue Vector1 = N->getOperand(1);
		SDValue Pred = N->getOperand(2);
		SDValue Vector2 = N->getOperand(3);

		return DAG.getNode(Opc, DL, VT, Vector1, Pred, Vector2);
		}

static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,		static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {		APInt &UndefBits) {
EVT VT = BVN->getValueType(0);		EVT VT = BVN->getValueType(0);
APInt SplatBits, SplatUndef;		APInt SplatBits, SplatUndef;
unsigned SplatBitSize;		unsigned SplatBitSize;
bool HasAnyUndefs;		bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {		if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
▲ Show 20 Lines • Show All 4,166 Lines • ▼ Show 20 Lines	case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);		AArch64CC::ANY_ACTIVE);
case Intrinsic::aarch64_sve_ptest_first:		case Intrinsic::aarch64_sve_ptest_first:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::FIRST_ACTIVE);		AArch64CC::FIRST_ACTIVE);
case Intrinsic::aarch64_sve_ptest_last:		case Intrinsic::aarch64_sve_ptest_last:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),		return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::LAST_ACTIVE);		AArch64CC::LAST_ACTIVE);
		case Intrinsic::aarch64_sve_frintp:
		paulwalker-armUnsubmitted Not Done Reply Inline Actions As suggested by Eli you only need to implement the FRINTP_PRED variant, which doesn't set any expectation on the result of inactive lanes. Doing this means the intrinsics can remain untouched. FYI: I'm in the process of converting the last few remaining instances of _MERGE_OP1 nodes (only the shifts and max/min remain) because we have no real need for them as yet and I'm trying to ensure we don't tie the register allocator's hands when code generating normal IR. paulwalker-arm: As suggested by Eli you only need to implement the FRINTP_PRED variant, which doesn't set any…
		return combineSVEPredIntrinsic(AArch64ISD::FRINTP_PRED, N, DAG);
}		}
return SDValue();		return SDValue();
}		}

static SDValue performExtendCombine(SDNode *N,		static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then		// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
▲ Show 20 Lines • Show All 3,050 Lines • ▼ Show 20 Lines	case MVT::nxv8i16:
break;		break;
}		}

return convertFromScalableVector(DAG, VT, Val);		return convertFromScalableVector(DAG, VT, Val);
}		}

SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,		SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,		SelectionDAG &DAG,
unsigned NewOp) const {		unsigned NewOp,
		bool Merging) const {
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
SDLoc DL(Op);		SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);		auto Pg = getPredicateForVector(DAG, DL, VT);

if (useSVEForFixedLengthVectorVT(VT)) {		if (useSVEForFixedLengthVectorVT(VT)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);		EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

// Create list of operands by convereting existing ones to scalable types.		// Create list of operands by convereting existing ones to scalable types.
Show All 10 Lines	if (useSVEForFixedLengthVectorVT(VT)) {
}		}

auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);		auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);		return convertFromScalableVector(DAG, VT, ScalableRes);
}		}

assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");		assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");

SmallVector<SDValue, 4> Operands = {Pg};		SmallVector<SDValue, 3> Operands;
for (const SDValue &V : Op->op_values()) {		for (const SDValue &V : Op->op_values()) {
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I don't understand why you need this change. By definitions the _PRED nodes should take the form: ISDNODE Op1, Op2...OpN -> ISDNODE_PRED Pg, Op1, Op2...OpN I would expect this function to do what's required without any changes. I suspect any issues are likely down to mistakes within the isel patterns. paulwalker-arm: I don't understand why you need this change. By definitions the _PRED nodes should take the…
assert((isa<CondCodeSDNode>(V) \|\| V.getValueType().isScalableVector()) &&		assert((isa<CondCodeSDNode>(V) \|\| V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");		"Only scalable vectors are supported!");
Operands.push_back(V);		Operands.push_back(V);
}		}
		if (Merging)
		Operands.insert(Operands.begin() + 1, Pg);
		else
		Operands.insert(Operands.begin(), Pg);

return DAG.getNode(NewOp, DL, VT, Operands);		return DAG.getNode(NewOp, DL, VT, Operands);
}		}
		No newline at end of file

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 156 Lines • ▼ Show 20 Lines
def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;		def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;		def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;		def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;		def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;		def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;		def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;		def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;

def SDT_AArch64Arith : SDTypeProfile<1, 3, [		def SDT_AArch64Arith : SDTypeProfile<1, 3, [
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This is incorrect because the unary _PRED nodes should have 2 operands. The predicated followed by the data operand. See SDT_AArch64Arith for inspiration, where you just need to drop the stuff related to Op3. paulwalker-arm: This is incorrect because the unary _PRED nodes should have 2 operands. The predicated followed…
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>
]>;		]>;

def SDT_AArch64FMA : SDTypeProfile<1, 4, [		def SDT_AArch64FMA : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
]>;		]>;

// Predicated operations with the result of inactive lanes being unspecified.		// Predicated operations with the result of inactive lanes being unspecified.
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;		def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;		def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;		def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;		def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;		def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;		def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;		def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;		def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;

// Merging op1 into the inactive lanes.		// Merging op1 into the inactive lanes.
def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;		def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;
		paulwalker-armUnsubmitted Not Done Reply Inline Actions I've tried to use the AArch64 names here, so AArch64frintp_mt, as it makes it easy to spot mismatches with the instruction definitions. paulwalker-arm: I've tried to use the AArch64 names here, so AArch64frintp_mt, as it makes it easy to spot…
def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;		def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;
def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;		def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;
def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;		def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;
def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;		def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;
def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;		def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;
def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;		def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;

def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;		def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
Show All 10 Lines
def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>;		def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>;
def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>;		def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>;

def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>;		def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>;
def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;		def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;

def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;		def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;

		def SDT_AArch64PredUnFp : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>]>;
		def AArch64frintp : SDNode<"AArch64ISD::FRINTP_PRED", SDT_AArch64PredUnFp>;

let Predicates = [HasSVE] in {		let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;		defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;		def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;		defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;		def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;		def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;

defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add, null_frag>;		defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add, null_frag>;
▲ Show 20 Lines • Show All 1,142 Lines • ▼ Show 20 Lines	multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;		defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;		defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;		defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;		defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;		defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;

defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;		defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;		defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp>;
defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;		defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;		defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;		defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;		defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;		defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;		defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;		defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;

▲ Show 20 Lines • Show All 1,238 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fp.ll

	Show First 20 Lines • Show All 402 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: st1w { z0.s }, p0, [x1]			; CHECK-NEXT: st1w { z0.s }, p0, [x1]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%A = load <vscale x 4 x float>, <vscale x 4 x float>* %P1, align 16			%A = load <vscale x 4 x float>, <vscale x 4 x float>* %P1, align 16
	store <vscale x 4 x float> %A, <vscale x 4 x float>* %P2, align 16			store <vscale x 4 x float> %A, <vscale x 4 x float>* %P2, align 16
	ret void			ret void
	}			}

				; FCEIL

				define <vscale x 8 x half> @frintp_nxv8f16(<vscale x 8 x half> %a) {
				paulwalker-armUnsubmitted Not Done Reply Inline Actions You'll need tests for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32). paulwalker-arm: You'll need tests for the other legal floating point MVTs (i.e. nxv2f16, nxv4f16 and nxv2f32).
				; CHECK-LABEL: frintp_nxv8f16:
				; CHECK: ptrue p0.h
				; CHECK-NEXT: frintp z0.h, p0/m, z0.h
				; CHECK-NEXT: ret
				%res = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %a)
				ret <vscale x 8 x half> %res

				}

				define <vscale x 4 x float> @frintp_nxv4f32(<vscale x 4 x float> %a) {
				; CHECK-LABEL: frintp_nxv4f32:
				; CHECK: ptrue p0.s
				; CHECK-NEXT: frintp z0.s, p0/m, z0.s
				; CHECK-NEXT: ret
				%res = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %a)
				ret <vscale x 4 x float> %res

				}

				define <vscale x 2 x double> @frintp_nxv2f64(<vscale x 2 x double> %a) {
				; CHECK-LABEL: frintp_nxv2f64:
				; CHECK: ptrue p0.d
				; CHECK-NEXT: frintp z0.d, p0/m, z0.d
				; CHECK-NEXT: ret
				%res = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %a)
				ret <vscale x 2 x double> %res

				}

	declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
	declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

	declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
	declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

	declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)			declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
	declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)			declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
	declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)			declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
	declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)			declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
	declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)			declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
	declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)			declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)

				declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>)
				declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
				declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)

	; Function Attrs: nounwind readnone			; Function Attrs: nounwind readnone
	declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2			declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add lowering for llvm fceil
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 280558

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-fp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add lowering for llvm fceilClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 280558

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-fp.ll

[AArch64][SVE] Add lowering for llvm fceil
ClosedPublic