Diff 298295

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 12,330 Lines • ▼ Show 20 Lines	Cmp = getAArch64Cmp(
ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,		ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
dl);		dl);

EVT VT = Op->getValueType(0);		EVT VT = Op->getValueType(0);
LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));		LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);		return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}		}

		// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
		static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
		EVT VT = N->getValueType(0);
		// Only scalar integer and vector types.
		if (N->getOpcode() != ISD::ADD \|\| !VT.isScalarInteger())
		return SDValue();

		SDValue LHS = N->getOperand(0);
		SDValue RHS = N->getOperand(1);
		if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
		RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\| LHS.getValueType() != VT)
		return SDValue();

		auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
		auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
		if (!LHSN1 \|\| LHSN1 != RHSN1 \|\| !RHSN1->isNullValue())
		return SDValue();

		SDValue Op1 = LHS->getOperand(0);
		SDValue Op2 = RHS->getOperand(0);
		EVT OpVT1 = Op1.getValueType();
		EVT OpVT2 = Op2.getValueType();
		if (Op1.getOpcode() != AArch64ISD::UADDV \|\| OpVT1 != OpVT2 \|\|
		Op2.getOpcode() != AArch64ISD::UADDV \|\|
		OpVT1.getVectorElementType() != VT)
		return SDValue();

		SDValue Val1 = Op1.getOperand(0);
		SDValue Val2 = Op2.getOperand(0);
		EVT ValVT = Val1->getValueType(0);
		SDLoc DL(N);
		SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
		DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
		DAG.getConstant(0, DL, MVT::i64));
		}

// The basic add/sub long vector instructions have variants with "2" on the end		// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by		// which act on the high-half of their inputs. They are normally matched by
// patterns like:		// patterns like:
//		//
// (add (zeroext (extract_high LHS)),		// (add (zeroext (extract_high LHS)),
// (zeroext (extract_high RHS)))		// (zeroext (extract_high RHS)))
// -> uaddl2 vD, vN, vM		// -> uaddl2 vD, vN, vM
//		//
Show All 37 Lines	if (!LHS.getNode())
return SDValue();		return SDValue();

LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);		LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
}		}

return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);		return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}		}

		static SDValue performAddSubCombine(SDNode *N,
		TargetLowering::DAGCombinerInfo &DCI,
		SelectionDAG &DAG) {
		// Try to change sum of two reductions.
		if (SDValue Val = performUADDVCombine(N, DAG))
		return Val;

		return performAddSubLongCombine(N, DCI, DAG);
		}

// Massage DAGs which we can use the high-half "long" operations on into		// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.		// something isel will recognize better. E.g.
//		//
// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->		// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
// (aarch64_neon_umull (extract_high (v2i64 vec)))		// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))		// (extract_high (v2i64 (dup128 scalar)))))
//		//
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,		static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
▲ Show 20 Lines • Show All 2,334 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {		switch (N->getOpcode()) {
default:		default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");		LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;		break;
case ISD::ABS:		case ISD::ABS:
return performABSCombine(N, DAG, DCI, Subtarget);		return performABSCombine(N, DAG, DCI, Subtarget);
case ISD::ADD:		case ISD::ADD:
case ISD::SUB:		case ISD::SUB:
return performAddSubLongCombine(N, DCI, DAG);		return performAddSubCombine(N, DCI, DAG);
case ISD::XOR:		case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);		return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:		case ISD::MUL:
return performMulCombine(N, DAG, DCI, Subtarget);		return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:		case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:		case ISD::UINT_TO_FP:
return performIntToFpCombine(N, DAG, Subtarget);		return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:		case ISD::FP_TO_SINT:
▲ Show 20 Lines • Show All 1,530 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/aarch64-addv.ll

Show First 20 Lines • Show All 132 Lines • ▼ Show 20 Lines	entry:
%rdx.2 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2)		%rdx.2 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2)
%r = add i16 %rdx.1, %rdx.2		%r = add i16 %rdx.1, %rdx.2
ret i16 %r		ret i16 %r
}		}

define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) {		define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) {
; CHECK-LABEL: addv_combine_i32:		; CHECK-LABEL: addv_combine_i32:
; CHECK: // %bb.0: // %entry		; CHECK: // %bb.0: // %entry
		; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: addv s0, v0.4s		; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: addv s1, v1.4s		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: add w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
entry:		entry:
%rdx.1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a1)		%rdx.1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a1)
%rdx.2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2)		%rdx.2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2)
%r = add i32 %rdx.1, %rdx.2		%r = add i32 %rdx.1, %rdx.2
ret i32 %r		ret i32 %r
}		}

define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) {		define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: addv_combine_i64:		; CHECK-LABEL: addv_combine_i64:
; CHECK: // %bb.0: // %entry		; CHECK: // %bb.0: // %entry
		; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: addp d0, v0.2d		; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: addp d1, v1.2d		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
entry:		entry:
%rdx.1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1)		%rdx.1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1)
%rdx.2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a2)		%rdx.2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a2)
%r = add i64 %rdx.1, %rdx.2		%r = add i64 %rdx.1, %rdx.2
ret i64 %r		ret i64 %r
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Combine UADDVs to generate vector add
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 298295

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/aarch64-addv.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Combine UADDVs to generate vector addClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 298295

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/aarch64-addv.ll

[AArch64] Combine UADDVs to generate vector add
ClosedPublic