Diff 466404

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,747 Lines • ▼ Show 20 Lines	EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
LLVMContext &C, EVT VT) const {		LLVMContext &C, EVT VT) const {
if (!VT.isVector())		if (!VT.isVector())
return MVT::i32;		return MVT::i32;
if (VT.isScalableVector())		if (VT.isScalableVector())
return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());		return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();		return VT.changeVectorElementTypeToInteger();
}		}

		// isIntImmediate - This method tests to see if the node is a constant
		// operand. If so Imm will receive the value.
		static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
		if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
		Imm = C->getZExtValue();
		return true;
		}
		return false;
		}

		// isOpcWithIntImmediate - This method tests to see if the node is a specific
		// opcode and that it has a immediate integer right operand.
		// If so Imm will receive the value.
		static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
		uint64_t &Imm) {
		return N->getOpcode() == Opc &&
		isIntImmediate(N->getOperand(1).getNode(), Imm);
		}

static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,		static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
const APInt &Demanded,		const APInt &Demanded,
TargetLowering::TargetLoweringOpt &TLO,		TargetLowering::TargetLoweringOpt &TLO,
unsigned NewOpc) {		unsigned NewOpc) {
uint64_t OldImm = Imm, NewImm, Enc;		uint64_t OldImm = Imm, NewImm, Enc;
uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;		uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;

// Return if the immediate is already all zeros, all ones, a bimm32 or a		// Return if the immediate is already all zeros, all ones, a bimm32 or a
▲ Show 20 Lines • Show All 14,936 Lines • ▼ Show 20 Lines	if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);		SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,		return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
SubvectorIdx);		SubvectorIdx);
}		}

return SDValue();		return SDValue();
}		}

		static SDValue performAddCombineForShiftedOperands(SDNode *N,
		SelectionDAG &DAG) {
		// NOTE: Swapping LHS and RHS is not done for SUB, since SUB is not
		// commutative.
		if (N->getOpcode() != ISD::ADD)
		return SDValue();

		// Bail out when value type is not one of {i32, i64}, since AArch64 ADD with
		// shifted register is only available for i32 and i64.
		EVT VT = N->getValueType(0);
		if (VT != MVT::i32 && VT != MVT::i64)
		return SDValue();

		SDLoc DL(N);
		SDValue LHS = N->getOperand(0);
		SDValue RHS = N->getOperand(1);

		uint64_t LHSImm = 0, RHSImm = 0;
		// If both operand are shifted by imm and shift amount is not greater than 4
		// for one operand, swap LHS and RHS to put operand with smaller shift amount
		// on RHS.
		//
		// On many AArch64 processors (Cortex A78, Neoverse N1/N2/V1, etc), ADD with
		// LSL shift (shift <= 4) has smaller latency and larger throughput than ADD
		dmgreenUnsubmitted Done Reply Inline Actions AND -> ADD dmgreen: AND -> ADD
		// with LSL (shift > 4). For the rest of processors, this is no-op for
		dmgreenUnsubmitted Done Reply Inline Actions `> 4` dmgreen: `> 4`
		// performance or correctness.
		dmgreenUnsubmitted Done Reply Inline Actions correctness ;) dmgreen: correctness ;)
		if (isOpcWithIntImmediate(LHS.getNode(), ISD::SHL, LHSImm) &&
		isOpcWithIntImmediate(RHS.getNode(), ISD::SHL, RHSImm) && LHSImm <= 4 &&
		RHSImm > 4 && LHS.hasOneUse())
		return DAG.getNode(ISD::ADD, DL, VT, RHS, LHS);

		return SDValue();
		}

static SDValue performAddSubCombine(SDNode *N,		static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
// Try to change sum of two reductions.		// Try to change sum of two reductions.
if (SDValue Val = performAddUADDVCombine(N, DAG))		if (SDValue Val = performAddUADDVCombine(N, DAG))
return Val;		return Val;
if (SDValue Val = performAddDotCombine(N, DAG))		if (SDValue Val = performAddDotCombine(N, DAG))
return Val;		return Val;
if (SDValue Val = performAddCSelIntoCSinc(N, DAG))		if (SDValue Val = performAddCSelIntoCSinc(N, DAG))
return Val;		return Val;
if (SDValue Val = performNegCSelCombine(N, DAG))		if (SDValue Val = performNegCSelCombine(N, DAG))
return Val;		return Val;
if (SDValue Val = performVectorAddSubExtCombine(N, DAG))		if (SDValue Val = performVectorAddSubExtCombine(N, DAG))
return Val;		return Val;
		if (SDValue Val = performAddCombineForShiftedOperands(N, DAG))
		return Val;

return performAddSubLongCombine(N, DCI, DAG);		return performAddSubLongCombine(N, DCI, DAG);
}		}

// Massage DAGs which we can use the high-half "long" operations on into		// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.		// something isel will recognize better. E.g.
//		//
// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->		// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
▲ Show 20 Lines • Show All 6,031 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/logical_shifted_reg.ll

Show First 20 Lines • Show All 286 Lines • ▼ Show 20 Lines	other_exit:
ret void		ret void
ret:		ret:
ret void		ret void
}		}

define i64 @add_swap_rhs_lhs_i64(i64 %0, i64 %1) {		define i64 @add_swap_rhs_lhs_i64(i64 %0, i64 %1) {
; CHECK-LABEL: add_swap_rhs_lhs_i64:		; CHECK-LABEL: add_swap_rhs_lhs_i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsl x8, x1, #3		; CHECK-NEXT: lsl x8, x0, #8
; CHECK-NEXT: add x0, x8, x0, lsl #8		; CHECK-NEXT: add x0, x8, x1, lsl #3
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%3 = shl i64 %0, 8		%3 = shl i64 %0, 8
%4 = shl i64 %1, 3		%4 = shl i64 %1, 3
%5 = add i64 %4, %3		%5 = add i64 %4, %3
ret i64 %5		ret i64 %5
}		}

define i64 @add_swap_no_op_i64(i64 %0, i64 %1, i64* %2) {		define i64 @add_swap_no_op_i64(i64 %0, i64 %1, i64* %2) {
; CHECK-LABEL: add_swap_no_op_i64:		; CHECK-LABEL: add_swap_no_op_i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsl x8, x1, #3		; CHECK-NEXT: lsl x8, x1, #3
; CHECK-NEXT: add x0, x8, x0, lsl #8		; CHECK-NEXT: add x0, x8, x0, lsl #8
; CHECK-NEXT: str x8, [x2]		; CHECK-NEXT: str x8, [x2]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%4 = shl i64 %0, 8		%4 = shl i64 %0, 8
%5 = shl i64 %1, 3		%5 = shl i64 %1, 3
store i64 %5, i64* %2		store i64 %5, i64* %2
%6 = add i64 %5, %4		%6 = add i64 %5, %4
ret i64 %6		ret i64 %6
}		}

define i32 @add_swap_rhs_lhs_i32(i32 %0, i32 %1) {		define i32 @add_swap_rhs_lhs_i32(i32 %0, i32 %1) {
; CHECK-LABEL: add_swap_rhs_lhs_i32:		; CHECK-LABEL: add_swap_rhs_lhs_i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsl w8, w1, #3		; CHECK-NEXT: lsl w8, w0, #8
; CHECK-NEXT: add w0, w8, w0, lsl #8		; CHECK-NEXT: add w0, w8, w1, lsl #3
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%3 = shl i32 %0, 8		%3 = shl i32 %0, 8
%4 = shl i32 %1, 3		%4 = shl i32 %1, 3
%5 = add i32 %4, %3		%5 = add i32 %4, %3
ret i32 %5		ret i32 %5
}		}

define i32 @add_swap_no_op_i32(i32 %0, i32 %1, i32* %2) {		define i32 @add_swap_no_op_i32(i32 %0, i32 %1, i32* %2) {
Show All 14 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Swap 'lsl(val1,small-shmt)' to right hand side for ADD(lsl(val1,small-shmt), lsl(val2,large-shmt))
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 466404

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/logical_shifted_reg.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Swap 'lsl(val1,small-shmt)' to right hand side for ADD(lsl(val1,small-shmt), lsl(val2,large-shmt))ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 466404

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/logical_shifted_reg.ll

[AArch64] Swap 'lsl(val1,small-shmt)' to right hand side for ADD(lsl(val1,small-shmt), lsl(val2,large-shmt))
ClosedPublic