Diff 257679

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 115 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {

// Vector shift by scalar (again)		// Vector shift by scalar (again)
SQSHL_I,		SQSHL_I,
UQSHL_I,		UQSHL_I,
SQSHLU_I,		SQSHLU_I,
SRSHR_I,		SRSHR_I,
URSHR_I,		URSHR_I,

		// Vector shift by constant and insert
		VSLI,
		VSRI,

// Vector comparisons		// Vector comparisons
CMEQ,		CMEQ,
CMGE,		CMGE,
CMGT,		CMGT,
CMHI,		CMHI,
CMHS,		CMHS,
FCMEQ,		FCMEQ,
FCMGE,		FCMGE,
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
/// need to re-interpret the data in SIMD vector registers in big-endian		/// need to re-interpret the data in SIMD vector registers in big-endian
/// mode without emitting such REV instructions.		/// mode without emitting such REV instructions.
NVCAST,		NVCAST,

SMULL,		SMULL,
UMULL,		UMULL,

// Reciprocal estimates and steps.		// Reciprocal estimates and steps.
FRECPE, FRECPS,		FRECPE,
FRSQRTE, FRSQRTS,		FRECPS,
		FRSQRTE,
		FRSQRTS,

SUNPKHI,		SUNPKHI,
SUNPKLO,		SUNPKLO,
UUNPKHI,		UUNPKHI,
UUNPKLO,		UUNPKLO,

CLASTA_N,		CLASTA_N,
CLASTB_N,		CLASTB_N,
▲ Show 20 Lines • Show All 666 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 93 Lines • ▼ Show 20 Lines
using namespace llvm::PatternMatch;		using namespace llvm::PatternMatch;

#define DEBUG_TYPE "aarch64-lower"		#define DEBUG_TYPE "aarch64-lower"

STATISTIC(NumTailCalls, "Number of tail calls");		STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");		STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");		STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");

static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
cl::desc("Allow AArch64 SLI/SRI formation"),
cl::init(false));

// FIXME: The necessary dtprel relocations don't seem to be supported		// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by		// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.		// default, for now, fall back to GeneralDynamic code generation.
cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(		cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
"aarch64-elf-ldtls-generation", cl::Hidden,		"aarch64-elf-ldtls-generation", cl::Hidden,
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),		cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));		cl::init(false));

▲ Show 20 Lines • Show All 1,203 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::TRN2: return "AArch64ISD::TRN2";		case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
case AArch64ISD::REV16: return "AArch64ISD::REV16";		case AArch64ISD::REV16: return "AArch64ISD::REV16";
case AArch64ISD::REV32: return "AArch64ISD::REV32";		case AArch64ISD::REV32: return "AArch64ISD::REV32";
case AArch64ISD::REV64: return "AArch64ISD::REV64";		case AArch64ISD::REV64: return "AArch64ISD::REV64";
case AArch64ISD::EXT: return "AArch64ISD::EXT";		case AArch64ISD::EXT: return "AArch64ISD::EXT";
case AArch64ISD::VSHL: return "AArch64ISD::VSHL";		case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";		case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
case AArch64ISD::VASHR: return "AArch64ISD::VASHR";		case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
		case AArch64ISD::VSLI: return "AArch64ISD::VSLI";
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - case AArch64ISD::VSLI: return "AArch64ISD::VSLI"; - case AArch64ISD::VSRI: return "AArch64ISD::VSRI"; + case AArch64ISD::VSLI: + return "AArch64ISD::VSLI"; + case AArch64ISD::VSRI: + return "AArch64ISD::VSRI"; Lint: Pre-merge checks: clang-format: please reformat the code ``` - case AArch64ISD::VSLI: return…
		case AArch64ISD::VSRI: return "AArch64ISD::VSRI";
case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";		case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
case AArch64ISD::CMGE: return "AArch64ISD::CMGE";		case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
case AArch64ISD::CMGT: return "AArch64ISD::CMGT";		case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
case AArch64ISD::CMHI: return "AArch64ISD::CMHI";		case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
case AArch64ISD::CMHS: return "AArch64ISD::CMHS";		case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";		case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";		case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";		case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
▲ Show 20 Lines • Show All 1,806 Lines • ▼ Show 20 Lines	case Intrinsic::eh_recoverfp: {
SDValue IncomingFPOp = Op.getOperand(2);		SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);		GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);		auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)		if (!Fn)
report_fatal_error(		report_fatal_error(
"llvm.eh.recoverfp must take a function as the first argument");		"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;		return IncomingFPOp;
}		}

		case Intrinsic::aarch64_neon_vsri:
		case Intrinsic::aarch64_neon_vsli: {
		EVT Ty = Op.getValueType();

		if (!Ty.isVector())
		report_fatal_error("Unexpected type for aarch64_neon_vsli");

		uint64_t ShiftAmount = Op.getConstantOperandVal(3);
		unsigned ElemSizeInBits = Ty.getScalarSizeInBits();
		assert(ShiftAmount <= ElemSizeInBits);

		bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
		dmgreenUnsubmitted Done Reply Inline Actions You could possibly use uint64_t ShiftAmount = Op.getConstantOperandVal(3), simplify it a little. It won't handle the report_fatal_error, but this should have already been checked at a higher level, somewhere in clang to make sure the shift amount is a constant. Same for the ElemSizeInBits check below. You could change them to asserts. Up to you. dmgreen: You could possibly use uint64_t ShiftAmount = Op.getConstantOperandVal(3), simplify it a little.
		unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
		return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
		Op.getOperand(3));
		}
}		}
}		}

bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {		bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector();		return ExtVal.getValueType().isScalableVector();
}		}

// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.		// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
▲ Show 20 Lines • Show All 4,732 Lines • ▼ Show 20 Lines	if (IID < Intrinsic::num_intrinsics)
return IID;		return IID;
return Intrinsic::not_intrinsic;		return Intrinsic::not_intrinsic;
}		}
}		}
}		}

// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),		// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a		// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.		// BUILD_VECTORs with constant element C1, C2 is a constant, and:
// Also, logical shift right -> sri, with the same structure.		// - for the SLI case: C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2)
		// - for the SRI case: C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2)
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {		static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

if (!VT.isVector())		if (!VT.isVector())
return SDValue();		return SDValue();

SDLoc DL(N);		SDLoc DL(N);

Show All 16 Lines	static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
if (!C2node)		if (!C2node)
return SDValue();		return SDValue();

// Is the and mask vector all constant?		// Is the and mask vector all constant?
uint64_t C1;		uint64_t C1;
if (!isAllConstantBuildVector(And.getOperand(1), C1))		if (!isAllConstantBuildVector(And.getOperand(1), C1))
return SDValue();		return SDValue();

// Is C1 == ~C2, taking into account how much one can shift elements of a		// Is C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2) or
// particular size?		// C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2), taking into account
		// how much one can shift elements of a particular size?
uint64_t C2 = C2node->getZExtValue();		uint64_t C2 = C2node->getZExtValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();		unsigned ElemSizeInBits = VT.getScalarSizeInBits();
if (C2 > ElemSizeInBits)		if (C2 > ElemSizeInBits)
return SDValue();		return SDValue();
unsigned ElemMask = (1 << ElemSizeInBits) - 1;		unsigned ElemMask = (1 << ElemSizeInBits) - 1;
if ((C1 & ElemMask) != (~C2 & ElemMask))		if (IsShiftRight) {
		if ((C1 & ElemMask) != ((ElemMask << (ElemSizeInBits - C2)) & ElemMask))
		return SDValue();
		} else {
		if ((C1 & ElemMask) != ((ElemMask >> (ElemSizeInBits - C2)) & ElemMask))
return SDValue();		return SDValue();
		}

SDValue X = And.getOperand(0);		SDValue X = And.getOperand(0);
SDValue Y = Shift.getOperand(0);		SDValue Y = Shift.getOperand(0);

unsigned Intrin =		unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
		dmgreenUnsubmitted Done Reply Inline Actions I know you didn't write this part, but it's generally not a great idea to lower via intrinsics if it can be helped. Can you add a ISel node, a lot like ARMISD::VSHLIMM, and plumb that through tablegen instead? dmgreen: I know you didn't write this part, but it's generally not a great idea to lower via intrinsics…
		PetreTudorAuthorUnsubmitted Done Reply Inline Actions Sure! I will start working on this right away. PetreTudor: Sure! I will start working on this right away.
IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;		SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
SDValue ResultSLI =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
Shift.getOperand(1));

LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");		LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
LLVM_DEBUG(N->dump(&DAG));		LLVM_DEBUG(N->dump(&DAG));
LLVM_DEBUG(dbgs() << "into: \n");		LLVM_DEBUG(dbgs() << "into: \n");
LLVM_DEBUG(ResultSLI->dump(&DAG));		LLVM_DEBUG(ResultSLI->dump(&DAG));

++NumShiftInserts;		++NumShiftInserts;
return ResultSLI;		return ResultSLI;
}		}

SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,		SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))		// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (EnableAArch64SlrGeneration) {
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))		if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;		return Res;
}

EVT VT = Op.getValueType();		EVT VT = Op.getValueType();

SDValue LHS = Op.getOperand(0);		SDValue LHS = Op.getOperand(0);
BuildVectorSDNode *BVN =		BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());		dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
if (!BVN) {		if (!BVN) {
// OR commutes, so try swapping the operands.		// OR commutes, so try swapping the operands.
▲ Show 20 Lines • Show All 5,993 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 225 Lines • ▼ Show 20 Lines
	def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;			def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
	def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,			def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
	SDTCisInt<2>, SDTCisInt<3>]>;			SDTCisInt<2>, SDTCisInt<3>]>;
	def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;			def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
	def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,			def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
	SDTCisSameAs<0,2>, SDTCisInt<3>]>;			SDTCisSameAs<0,2>, SDTCisInt<3>]>;
	def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;			def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;

				def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
				SDTCisSameAs<0,1>,
				SDTCisSameAs<0,2>]>;

	def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;			def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
	def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;			def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
	def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;			def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
	def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,			def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
	SDTCisSameAs<0,2>]>;			SDTCisSameAs<0,2>]>;
	def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,			def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
	SDTCisSameAs<0,2>,			SDTCisSameAs<0,2>,
	SDTCisSameAs<0,3>]>;			SDTCisSameAs<0,3>]>;
	▲ Show 20 Lines • Show All 222 Lines • ▼ Show 20 Lines
	def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;			def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
	def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;			def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
	def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;			def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
	def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;			def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
	def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;			def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
	def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;			def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
	def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;			def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
	def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;			def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
				def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
				def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;

	def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;			def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
	def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;			def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
	def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;			def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;

	def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;			def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
	def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;			def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
	def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;			def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
	▲ Show 20 Lines • Show All 5,343 Lines • ▼ Show 20 Lines
	defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;			defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
	defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",			defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
	int_aarch64_neon_vcvtfxs2fp>;			int_aarch64_neon_vcvtfxs2fp>;
	defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",			defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
	int_aarch64_neon_rshrn>;			int_aarch64_neon_rshrn>;
	defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;			defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
	defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",			defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
	BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;			BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
	defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;			defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
	def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),			def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
	(i32 vecshiftL64:$imm))),			(i32 vecshiftL64:$imm))),
	(SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;			(SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
	defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",			defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
	int_aarch64_neon_sqrshrn>;			int_aarch64_neon_sqrshrn>;
	defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",			defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
	int_aarch64_neon_sqrshrun>;			int_aarch64_neon_sqrshrun>;
	defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;			defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
	defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;			defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
	defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",			defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
	int_aarch64_neon_sqshrn>;			int_aarch64_neon_sqshrn>;
	defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",			defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
	int_aarch64_neon_sqshrun>;			int_aarch64_neon_sqshrun>;
	defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;			defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
	def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),			def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
	(i32 vecshiftR64:$imm))),			(i32 vecshiftR64:$imm))),
	(SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;			(SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
	defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;			defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
	defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",			defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
	TriOpFrag<(add node:$LHS,			TriOpFrag<(add node:$LHS,
	(AArch64srshri node:$MHS, node:$RHS))> >;			(AArch64srshri node:$MHS, node:$RHS))> >;
	defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",			defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
	BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;			BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
	▲ Show 20 Lines • Show All 1,513 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll

	; RUN: llc < %s -aarch64-shift-insert-generation=true -mtriple=arm64-eabi -aarch64-neon-syntax=apple \| FileCheck %s			; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple \| FileCheck %s

	define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {			define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
	; CHECK-LABEL: testLeftGood:			; CHECK-LABEL: testLeftGood:
	; CHECK: sli.16b v0, v1, #3			; CHECK: sli.16b v0, v1, #3
				efriedmaUnsubmitted Not Done Reply Inline Actions Please generate simple tests like this with update_llc_test_checks.py. This transform is simply wrong, as written: sli is not the vector version of bfi. efriedma: Please generate simple tests like this with update_llc_test_checks.py. This transform is…
	%and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>			%and.i = and <16 x i8> %src1, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
	%vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>			%vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
	%result = or <16 x i8> %and.i, %vshl_n			%result = or <16 x i8> %and.i, %vshl_n
	store <16 x i8> %result, <16 x i8>* %dest, align 16			store <16 x i8> %result, <16 x i8>* %dest, align 16
	ret void			ret void
	}			}

	define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {			define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
	; CHECK-LABEL: testLeftBad:			; CHECK-LABEL: testLeftBad:
	; CHECK-NOT: sli			; CHECK-NOT: sli
	%and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>			%and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
	%vshl_n = shl <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>			%vshl_n = shl <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
	%result = or <16 x i8> %and.i, %vshl_n			%result = or <16 x i8> %and.i, %vshl_n
	store <16 x i8> %result, <16 x i8>* %dest, align 16			store <16 x i8> %result, <16 x i8>* %dest, align 16
	ret void			ret void
	}			}

	define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {			define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
	; CHECK-LABEL: testRightGood:			; CHECK-LABEL: testRightGood:
	; CHECK: sri.16b v0, v1, #3			; CHECK: sri.16b v0, v1, #3
	%and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>			%and.i = and <16 x i8> %src1, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
	%vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>			%vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
	%result = or <16 x i8> %and.i, %vshl_n			%result = or <16 x i8> %and.i, %vshl_n
	store <16 x i8> %result, <16 x i8>* %dest, align 16			store <16 x i8> %result, <16 x i8>* %dest, align 16
	ret void			ret void
	}			}

	define void @testRightBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {			define void @testRightBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
	; CHECK-LABEL: testRightBad:			; CHECK-LABEL: testRightBad:
	; CHECK-NOT: sri			; CHECK-NOT: sri
	%and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>			%and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
	%vshl_n = lshr <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>			%vshl_n = lshr <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
	%result = or <16 x i8> %and.i, %vshl_n			%result = or <16 x i8> %and.i, %vshl_n
	store <16 x i8> %result, <16 x i8>* %dest, align 16			store <16 x i8> %result, <16 x i8>* %dest, align 16
	ret void			ret void
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Fix conditions for lowering to S[LR]I
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 257679

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrInfo.td

llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Fix conditions for lowering to S[LR]IClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 257679

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrInfo.td

llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll

[ARM] Fix conditions for lowering to S[LR]I
ClosedPublic