Diff 401465

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,826 Lines • ▼ Show 20 Lines	default:
break;		break;
case AArch64ISD::CSEL: {		case AArch64ISD::CSEL: {
KnownBits Known2;		KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);		Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);		Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::commonBits(Known, Known2);		Known = KnownBits::commonBits(Known, Known2);
break;		break;
}		}
		case AArch64ISD::BICi: {
		// Compute the bit cleared value.
		uint64_t Mask =
		~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
		dmgreenUnsubmitted Not Done Reply Inline Actions We don't appear to use Known2 and Known3 dmgreen: We don't appear to use Known2 and Known3
		Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
		Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
		dmgreenUnsubmitted Not Done Reply Inline Actions Is this chunk needed? I think the operands should be constants by definition. dmgreen: Is this chunk needed? I think the operands should be constants by definition.
		break;
		}
		case AArch64ISD::VLSHR: {
		KnownBits Known2;
		dmgreenUnsubmitted Not Done Reply Inline Actions We know that the operands 1 and 2 will be constants, so I think we can just grab the values for them directly: uint64_t Mask = ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2)); The Mask then specifies which bits are known to be 0. dmgreen: We know that the operands 1 and 2 will be constants, so I think we can just grab the values for…
		Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
		Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
		Known = KnownBits::lshr(Known, Known2);
		break;
		}
		case AArch64ISD::VASHR: {
		KnownBits Known2;
		Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
		Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
		Known = KnownBits::ashr(Known, Known2);
		break;
		}
case AArch64ISD::LOADgot:		case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {		case AArch64ISD::ADDlow: {
if (!Subtarget->isTargetILP32())		if (!Subtarget->isTargetILP32())
break;		break;
// In ILP32 mode all valid pointers are in the low 4GB of the address-space.		// In ILP32 mode all valid pointers are in the low 4GB of the address-space.
Known.Zero = APInt::getHighBitsSet(64, 32);		Known.Zero = APInt::getHighBitsSet(64, 32);
break;		break;
}		}
▲ Show 20 Lines • Show All 18,009 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,417 Lines • ▼ Show 20 Lines

// A scalar sqdmull with the second operand being a vector lane can be		// A scalar sqdmull with the second operand being a vector lane can be
// handled directly with the indexed instruction encoding.		// handled directly with the indexed instruction encoding.
def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),		def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(vector_extract (v4i32 V128:$Vm),		(vector_extract (v4i32 V128:$Vm),
VectorIndexS:$idx)),		VectorIndexS:$idx)),
(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;		(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;

		// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
		// have no common bits.
		def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
		[(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
		if (N->getOpcode() == ISD::ADD)
		return true;
		return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
		}]> {
		let GISelPredicateCode = [{
		// Only handle G_ADD for now. FIXME. build capability to compute whether
		// operands of G_OR have common bits set or not.
		return MI.getOpcode() == TargetOpcode::G_ADD;
		}];
		}


//----------------------------------------------------------------------------		//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions		// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------		//----------------------------------------------------------------------------
defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;		defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;		defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;		defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;		defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
// Codegen patterns for the above. We don't put these directly on the		// Codegen patterns for the above. We don't put these directly on the
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines	defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;		int_aarch64_neon_sqshrun>;
defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;		defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;		defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",		defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
TriOpFrag<(add node:$LHS,		TriOpFrag<(add node:$LHS,
(AArch64srshri node:$MHS, node:$RHS))>>;		(AArch64srshri node:$MHS, node:$RHS))>>;
defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;		defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",		defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
TriOpFrag<(add node:$LHS,		TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;		(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",		defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;		int_aarch64_neon_uqrshrn>;
defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;		defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",		defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
int_aarch64_neon_uqshrn>;		int_aarch64_neon_uqshrn>;
defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;		defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",		defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,		TriOpFrag<(add node:$LHS,
(AArch64urshri node:$MHS, node:$RHS))>>;		(AArch64urshri node:$MHS, node:$RHS))>>;
defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;		defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",		defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
TriOpFrag<(add node:$LHS,		TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vlshr node:$MHS, node:$RHS))>>;		(AArch64vlshr node:$MHS, node:$RHS))>>;

//----------------------------------------------------------------------------		//----------------------------------------------------------------------------
// AdvSIMD vector shift instructions		// AdvSIMD vector shift instructions
//----------------------------------------------------------------------------		//----------------------------------------------------------------------------
defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;		defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;		defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",		defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
Show All 25 Lines
defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",		defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
TriOpFrag<(add node:$LHS,		TriOpFrag<(add node:$LHS,
(AArch64srshri node:$MHS, node:$RHS))> >;		(AArch64srshri node:$MHS, node:$RHS))> >;
defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",		defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;		BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;

defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;		defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",		defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;		TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",		defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
int_aarch64_neon_vcvtfxu2fp>;		int_aarch64_neon_vcvtfxu2fp>;
defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",		defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;		int_aarch64_neon_uqrshrn>;
defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;		defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",		defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
int_aarch64_neon_uqshrn>;		int_aarch64_neon_uqshrn>;
defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;		defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",		defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,		TriOpFrag<(add node:$LHS,
(AArch64urshri node:$MHS, node:$RHS))> >;		(AArch64urshri node:$MHS, node:$RHS))> >;
defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",		defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;		BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;		defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",		defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;		TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;

// RADDHN patterns for when RSHRN shifts by half the size of the vector element		// RADDHN patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),		def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
(RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;		(RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),		def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
(RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;		(RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),		def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
(RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;		(RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
▲ Show 20 Lines • Show All 1,731 Lines • ▼ Show 20 Lines	def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),		: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
[]>, Sched<[]>;		[]>, Sched<[]>;

def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;		def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
def : Pat<(AArch64AssertZExtBool GPR32:$op),		def : Pat<(AArch64AssertZExtBool GPR32:$op),
(i32 GPR32:$op)>;		(i32 GPR32:$op)>;

include "AArch64InstrAtomics.td"		include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"		include "AArch64SVEInstrInfo.td"
		dmgreenUnsubmitted Not Done Reply Inline Actions Now that we have this, it may be worth using it in the existing patterns, using a PatFrags that accepts either "add" or "or_is_add". That would save the need for new patterns. dmgreen: Now that we have this, it may be worth using it in the existing patterns, using a PatFrags that…
include "AArch64SMEInstrInfo.td"		include "AArch64SMEInstrInfo.td"
include "AArch64InstrGISel.td"		include "AArch64InstrGISel.td"

llvm/test/CodeGen/AArch64/shift-accumulate.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -mtriple=aarch64-none-eabi \| FileCheck %s
				dmgreenUnsubmitted Not Done Reply Inline Actions Can you run the update_llc_test_checks on the file? It's missing some expected output. It's also worth adding some simple case for each signedness / type that have tablegen patterns added / changed. dmgreen: Can you run the update_llc_test_checks on the file? It's missing some expected output. It's…

				dmgreenUnsubmitted Not Done Reply Inline Actions I tend to use -mtriple=aarch64-none-eabi dmgreen: I tend to use -mtriple=aarch64-none-eabi
				define <4 x i16> @usra_v4i16(<8 x i8> %0) {
				; CHECK-LABEL: usra_v4i16:
				dmgreenUnsubmitted Not Done Reply Inline Actions You can usually remove dso_local and local_unnamed_addr #0 align 32, to make the tests a little cleaner. dmgreen: You can usually remove dso_local and local_unnamed_addr #0 align 32, to make the tests a little…
				; CHECK: // %bb.0:
				; CHECK-NEXT: ushr v0.8b, v0.8b, #7
				; CHECK-NEXT: usra v0.4h, v0.4h, #7
				; CHECK-NEXT: ret
				%2 = lshr <8 x i8> %0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
				%3 = bitcast <8 x i8> %2 to <4 x i16>
				%4 = lshr <4 x i16> %3, <i16 7, i16 7, i16 7, i16 7>
				%5 = or <4 x i16> %4, %3
				dmgreenUnsubmitted Not Done Reply Inline Actions It's probably a better test to return the %5, not extract a single lane from it. dmgreen: It's probably a better test to return the %5, not extract a single lane from it.
				ret <4 x i16> %5
				}

				define <4 x i32> @usra_v4i32(<8 x i16> %0) {
				; CHECK-LABEL: usra_v4i32:
				; CHECK: // %bb.0:
				dmgreenUnsubmitted Not Done Reply Inline Actions Unfortunately this doesn't verify. I think because the BIC code is giving incorrect Known bits. dmgreen: Unfortunately this doesn't verify. I think because the BIC code is giving incorrect Known bits.
				; CHECK-NEXT: ushr v0.8h, v0.8h, #15
				; CHECK-NEXT: usra v0.4s, v0.4s, #15
				; CHECK-NEXT: ret
				%2 = lshr <8 x i16> %0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
				%3 = bitcast <8 x i16> %2 to <4 x i32>
				%4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
				%5 = or <4 x i32> %4, %3
				ret <4 x i32> %5
				}

				define <2 x i64> @usra_v2i64(<4 x i32> %0) {
				; CHECK-LABEL: usra_v2i64:
				dmgreenUnsubmitted Not Done Reply Inline Actions Can you add the <1 x i64> version of this test (and ssra_v2i64 too). It should then test the "scalar" instructions with "d0" and "d1" operands. You can remove dso_local too. dmgreen: Can you add the <1 x i64> version of this test (and ssra_v2i64 too). It should then test the…
				; CHECK: // %bb.0:
				; CHECK-NEXT: ushr v0.4s, v0.4s, #31
				; CHECK-NEXT: usra v0.2d, v0.2d, #31
				; CHECK-NEXT: ret
				%2 = lshr <4 x i32> %0, <i32 31, i32 31, i32 31, i32 31>
				%3 = bitcast <4 x i32> %2 to <2 x i64>
				%4 = lshr <2 x i64> %3, <i64 31, i64 31>
				%5 = or <2 x i64> %4, %3
				ret <2 x i64> %5
				}

				define <1 x i64> @usra_v1i64(<2 x i32> %0) {
				; CHECK-LABEL: usra_v1i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ushr v0.2s, v0.2s, #31
				; CHECK-NEXT: usra d0, d0, #31
				; CHECK-NEXT: ret
				%2 = lshr <2 x i32> %0, <i32 31, i32 31>
				%3 = bitcast <2 x i32> %2 to <1 x i64>
				%4 = lshr <1 x i64> %3, <i64 31>
				%5 = or <1 x i64> %4, %3
				ret <1 x i64> %5
				}

				define <4 x i16> @ssra_v4i16(<4 x i16> %0) {
				; CHECK-LABEL: ssra_v4i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ushr v1.4h, v0.4h, #15
				; CHECK-NEXT: bic v0.4h, #64, lsl #8
				; CHECK-NEXT: ssra v1.4h, v0.4h, #14
				; CHECK-NEXT: fmov d0, d1
				; CHECK-NEXT: ret
				; set the 15th bit to zero. e.g. 0b1111111111111111 to 0b1011111111111111
				%2 = and <4 x i16> %0, <i16 49151, i16 49151,i16 49151,i16 49151>
				; the first 15 bits are zero, the last bit can be zero or one. e.g. 0b1011111111111111 to 0b0000000000000001
				%3 = lshr <4 x i16> %0, <i16 15, i16 15, i16 15, i16 15>
				; the first 15 bits maybe 1, and the last bit is zero. 0b1011111111111111 to 0b1111111111111110
				%4 = ashr <4 x i16> %2, <i16 14, i16 14, i16 14, i16 14>
				%5 = or <4 x i16> %3, %4
				ret <4 x i16> %5
				}

				define <4 x i32> @ssra_v4i32(<4 x i32> %0) {
				; CHECK-LABEL: ssra_v4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ushr v1.4s, v0.4s, #31
				; CHECK-NEXT: bic v0.4s, #64, lsl #24
				; CHECK-NEXT: ssra v1.4s, v0.4s, #30
				; CHECK-NEXT: mov v0.16b, v1.16b
				; CHECK-NEXT: ret
				; set the 31th bit to zero.
				%2 = and <4 x i32> %0, <i32 3221225471, i32 3221225471,i32 3221225471,i32 3221225471>
				; the first 31 bits are zero, the last bit can be zero or one.
				%3 = lshr <4 x i32> %0, <i32 31, i32 31, i32 31, i32 31>
				; the first 31 bits maybe 1, and the last bit is zero.
				%4 = ashr <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
				%5 = or <4 x i32> %3, %4
				ret <4 x i32> %5
				}

				define <1 x i64> @ssra_v1i64(<2 x i32> %0) {
				; CHECK-LABEL: ssra_v1i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: bic v0.2s, #64, lsl #24
				; CHECK-NEXT: ushr d1, d0, #63
				; CHECK-NEXT: ssra d1, d0, #62
				; CHECK-NEXT: fmov d0, d1
				; CHECK-NEXT: ret
				%2 = and <2 x i32> %0, <i32 3221225471, i32 3221225471>
				%3 = bitcast <2 x i32> %2 to <1 x i64>
				%4 = lshr <1 x i64> %3, <i64 63>
				%5 = ashr <1 x i64> %3, <i64 62>
				%6 = or <1 x i64> %4, %5
				ret <1 x i64> %6
				}

				define <2 x i64> @ssra_v2i64(<4 x i32> %0) {
				; CHECK-LABEL: ssra_v2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: bic v0.4s, #64, lsl #24
				; CHECK-NEXT: ushr v1.2d, v0.2d, #63
				; CHECK-NEXT: ssra v1.2d, v0.2d, #62
				; CHECK-NEXT: mov v0.16b, v1.16b
				; CHECK-NEXT: ret
				%2 = and <4 x i32> %0, <i32 3221225471, i32 3221225471,i32 3221225471,i32 3221225471>
				%3 = bitcast <4 x i32> %2 to <2 x i64>
				%4 = lshr <2 x i64> %3, <i64 63, i64 63>
				%5 = ashr <2 x i64> %3, <i64 62, i64 62>
				%6 = or <2 x i64> %4, %5
				ret <2 x i64> %6
				}

This is an archive of the discontinued LLVM Phabricator instance.

Optimize shift and accumulate pattern in AArch64.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 401465

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrInfo.td

llvm/test/CodeGen/AArch64/shift-accumulate.ll

This is an archive of the discontinued LLVM Phabricator instance.

Optimize shift and accumulate pattern in AArch64.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 401465

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrInfo.td

llvm/test/CodeGen/AArch64/shift-accumulate.ll

Optimize shift and accumulate pattern in AArch64.
ClosedPublic