Diff 219867

llvm/include/llvm/Target/TargetSelectionDAG.td

	Show First 20 Lines • Show All 414 Lines • ▼ Show 20 Lines
	def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;			def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
	def trunc : SDNode<"ISD::TRUNCATE" , SDTIntTruncOp>;			def trunc : SDNode<"ISD::TRUNCATE" , SDTIntTruncOp>;
	def bitconvert : SDNode<"ISD::BITCAST" , SDTUnaryOp>;			def bitconvert : SDNode<"ISD::BITCAST" , SDTUnaryOp>;
	def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>;			def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>;
	def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;			def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
	def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;			def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;

	def vecreduce_add : SDNode<"ISD::VECREDUCE_ADD", SDTVecReduce>;			def vecreduce_add : SDNode<"ISD::VECREDUCE_ADD", SDTVecReduce>;
				def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>;
				def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>;
				def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>;
				def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>;

	def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;			def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
	def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;			def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
	def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;			def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
	def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;			def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
	def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;			def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
	def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>;			def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>;
	def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>;			def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>;
	▲ Show 20 Lines • Show All 1,131 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 263 Lines • ▼ Show 20 Lines	for (auto VT : IntTypes) {
setOperationAction(ISD::UDIV, VT, Expand);		setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);		setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);		setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);		setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);		setOperationAction(ISD::CTPOP, VT, Expand);

// Vector reductions		// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);		setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
		setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
		setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
		setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
		setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);

if (!HasMVEFP) {		if (!HasMVEFP) {
setOperationAction(ISD::SINT_TO_FP, VT, Expand);		setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);		setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);		setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);		setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}		}

▲ Show 20 Lines • Show All 16,510 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMInstrMVE.td

Show First 20 Lines • Show All 663 Lines • ▼ Show 20 Lines	multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
def u8 : MVE_VMINMAXV<iname, "u8", 0b1, 0b00, 0b1, bit_7>;		def u8 : MVE_VMINMAXV<iname, "u8", 0b1, 0b00, 0b1, bit_7>;
def u16 : MVE_VMINMAXV<iname, "u16", 0b1, 0b01, 0b1, bit_7>;		def u16 : MVE_VMINMAXV<iname, "u16", 0b1, 0b01, 0b1, bit_7>;
def u32 : MVE_VMINMAXV<iname, "u32", 0b1, 0b10, 0b1, bit_7>;		def u32 : MVE_VMINMAXV<iname, "u32", 0b1, 0b10, 0b1, bit_7>;
}		}

defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>;		defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>;
defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>;		defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>;

		let Predicates = [HasMVEInt] in {
		def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
		samparkerUnsubmitted Done Reply Inline Actions Guess these need to be changed to MQPR. samparker: Guess these need to be changed to MQPR.
		(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
		def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
		(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
		def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))),
		(i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>;
		def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))),
		(i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>;
		def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))),
		(i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>;
		def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))),
		(i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>;

		def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))),
		(i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>;
		def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))),
		(i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>;
		def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))),
		(i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;
		def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
		(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
		def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
		(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
		def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
		(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;

		}

multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {		multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;		def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;
def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;		def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;		def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;
}		}

defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;		defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;		defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
▲ Show 20 Lines • Show All 4,490 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Show First 20 Lines • Show All 1,053 Lines • ▼ Show 20 Lines	bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,

switch (Opcode) {		switch (Opcode) {
case Instruction::FAdd:		case Instruction::FAdd:
case Instruction::FMul:		case Instruction::FMul:
case Instruction::And:		case Instruction::And:
case Instruction::Or:		case Instruction::Or:
case Instruction::Xor:		case Instruction::Xor:
case Instruction::Mul:		case Instruction::Mul:
case Instruction::ICmp:
case Instruction::FCmp:		case Instruction::FCmp:
return false;		return false;
		case Instruction::ICmp:
		dmgreenUnsubmitted Done Reply Inline Actions Is there a VMINV patch too? Am I correct that this would enable both min and max reduction intrinsics? dmgreen: Is there a VMINV patch too? Am I correct that this would enable both min and max reduction…
		samtebbsAuthorUnsubmitted Done Reply Inline Actions It would enable both. There isn't a separate patch for vminv so I could add it to this one if you'd like. samtebbs: It would enable both. There isn't a separate patch for vminv so I could add it to this one if…
		samparkerUnsubmitted Not Done Reply Inline Actions Is this actually needed for codegen? samparker: Is this actually needed for codegen?
		samtebbsAuthorUnsubmitted Done Reply Inline Actions It is, as otherwise the smax and umax vector reductions aren't generated and can't be selected on. samtebbs: It is, as otherwise the smax and umax vector reductions aren't generated and can't be selected…
		samparkerUnsubmitted Not Done Reply Inline Actions As in, the reductions get expanded somewhere and your tests would fail? samparker: As in, the reductions get expanded somewhere and your tests would fail?
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Indeed, the tests fail and we get markedly worse codegen. 65c65,70 < vmaxv.u32 r2, q0 --- > vmov.f32 s5, s3 > vmax.u32 q0, q0, q1 > vmov.32 r2, q0[1] > vdup.32 q1, r2 > vmax.u32 q0, q0, q1 > vmov r2, s0 samtebbs: Indeed, the tests fail and we get markedly worse codegen. ``` 65c65,70 < vmaxv.u32 r2, q0…
		samparkerUnsubmitted Not Done Reply Inline Actions Ok, cheers. Then I expect Dave is right that the vmin could now get generated and will then assert because it can't be selected...? samparker: Ok, cheers. Then I expect Dave is right that the vmin could now get generated and will then…
		dmgreenUnsubmitted Not Done Reply Inline Actions Yeah, I think there's two different places that these intrinsics can be expanded, once in a pre-isel pass if this returns false, or in ISEL if the instructions are expand. So I think the selection would be OK, but it would be best to add vminv to make sure the vectoriser doesn't start generating them only for them the be messily expanded. dmgreen: Yeah, I think there's two different places that these intrinsics can be expanded, once in a pre…
case Instruction::Add:		case Instruction::Add:
return ScalarBits * Ty->getVectorNumElements() == 128;		return ScalarBits < 64 && ScalarBits * Ty->getVectorNumElements() == 128;
default:		default:
llvm_unreachable("Unhandled reduction opcode");		llvm_unreachable("Unhandled reduction opcode");
}		}
return false;		return false;
}		}

llvm/test/CodeGen/Thumb2/mve-vmaxv.ll

This file was added.

				; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - \| FileCheck %s

				declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
				declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
				declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
				declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
				declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
				declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
				declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
				declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
				declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
				declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
				declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
				declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)

				dmgreenUnsubmitted Done Reply Inline Actions The old code for these was quite large, right? Much larger than needing to materialise the constant? dmgreen: The old code for these was quite large, right? Much larger than needing to materialise the…
				samtebbsAuthorUnsubmitted Done Reply Inline Actions Yep, in this case it was 78 instructions long! And the others varied from 15 to 61. samtebbs: Yep, in this case it was 78 instructions long! And the others varied from 15 to 61.
				define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i32(<16 x i8> %s1) {
				; CHECK-LABEL: vmaxv_s_v16i8_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mvn r0, #127
				; CHECK-NEXT: vmaxv.s8 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1)
				ret i8 %r
				}

				define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i32(<8 x i16> %s1) {
				; CHECK-LABEL: vmaxv_s_v8i16_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movw r0, #32768
				; CHECK-NEXT: movt r0, #65535
				; CHECK-NEXT: vmaxv.s16 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1)
				ret i16 %r
				}

				define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1) {
				; CHECK-LABEL: vmaxv_s_v4i32_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mov.w r0, #-2147483648
				; CHECK-NEXT: vmaxv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1)
				ret i32 %r
				}

				define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i32(<16 x i8> %s1) {
				; CHECK-LABEL: vmaxv_u_v16i8_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movs r0, #0
				; CHECK-NEXT: vmaxv.u8 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1)
				ret i8 %r
				}

				define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i32(<8 x i16> %s1) {
				; CHECK-LABEL: vmaxv_u_v8i16_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movs r0, #0
				; CHECK-NEXT: vmaxv.u16 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1)
				ret i16 %r
				}

				define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1) {
				; CHECK-LABEL: vmaxv_u_v4i32_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movs r0, #0
				; CHECK-NEXT: vmaxv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1)
				ret i32 %r
				}

				define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i32(<16 x i8> %s1) {
				; CHECK-LABEL: vminv_s_v16i8_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movs r0, #127
				; CHECK-NEXT: vminv.s8 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1)
				ret i8 %r
				}

				define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i32(<8 x i16> %s1) {
				; CHECK-LABEL: vminv_s_v8i16_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movw r0, #32767
				; CHECK-NEXT: vminv.s16 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1)
				ret i16 %r
				}

				define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1) {
				; CHECK-LABEL: vminv_s_v4i32_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mvn r0, #-2147483648
				; CHECK-NEXT: vminv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1)
				ret i32 %r
				}

				define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i32(<16 x i8> %s1) {
				; CHECK-LABEL: vminv_u_v16i8_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movs r0, #255
				; CHECK-NEXT: vminv.u8 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1)
				ret i8 %r
				}

				define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i32(<8 x i16> %s1) {
				; CHECK-LABEL: vminv_u_v8i16_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movw r0, #65535
				; CHECK-NEXT: vminv.u16 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1)
				ret i16 %r
				}

				define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1) {
				; CHECK-LABEL: vminv_u_v4i32_i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mov.w r0, #-1
				; CHECK-NEXT: vminv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1)
				ret i32 %r
				}

This is an archive of the discontinued LLVM Phabricator instance.

[ARM} Add support for MVE vmaxv and vminv
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 219867

llvm/include/llvm/Target/TargetSelectionDAG.td

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/test/CodeGen/Thumb2/mve-vmaxv.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM} Add support for MVE vmaxv and vminvClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 219867

llvm/include/llvm/Target/TargetSelectionDAG.td

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/test/CodeGen/Thumb2/mve-vmaxv.ll

[ARM} Add support for MVE vmaxv and vminv
ClosedPublic