Diff 97525

lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 678 Lines • ▼ Show 20 Lines	if (Subtarget.hasVSX()) {
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);		addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);		addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);

if (Subtarget.hasP8Altivec()) {		if (Subtarget.hasP8Altivec()) {
setOperationAction(ISD::SHL, MVT::v2i64, Legal);		setOperationAction(ISD::SHL, MVT::v2i64, Legal);
setOperationAction(ISD::SRA, MVT::v2i64, Legal);		setOperationAction(ISD::SRA, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v2i64, Legal);		setOperationAction(ISD::SRL, MVT::v2i64, Legal);

		// 128 bit shifts can be accomplished via 2 instructions for SHL and
		nemanjaiUnsubmitted Done Reply Inline Actions s/2/3 (i.e. we need 3 instructions to do the shift). Same below. nemanjai: s/2/3 (i.e. we need 3 instructions to do the shift). Same below.
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions Thanks. iteratee: Thanks.
		// SRL, but not for SRA because of the instructions available:
		// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
		// doing
		setOperationAction(ISD::SHL, MVT::v1i128, Expand);
		setOperationAction(ISD::SRL, MVT::v1i128, Expand);
		setOperationAction(ISD::SRA, MVT::v1i128, Expand);

setOperationAction(ISD::SETCC, MVT::v2i64, Legal);		setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
}		}
else {		else {
setOperationAction(ISD::SHL, MVT::v2i64, Expand);		setOperationAction(ISD::SHL, MVT::v2i64, Expand);
setOperationAction(ISD::SRA, MVT::v2i64, Expand);		setOperationAction(ISD::SRA, MVT::v2i64, Expand);
setOperationAction(ISD::SRL, MVT::v2i64, Expand);		setOperationAction(ISD::SRL, MVT::v2i64, Expand);

setOperationAction(ISD::SETCC, MVT::v2i64, Custom);		setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
Show All 37 Lines	if (Subtarget.hasAltivec()) {
if (Subtarget.hasP8Altivec()) {		if (Subtarget.hasP8Altivec()) {
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);		addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);		addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
}		}

if (Subtarget.hasP9Vector()) {		if (Subtarget.hasP9Vector()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);		setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);		setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);

		// 128 bit shifts can be accomplished via 2 instructions for SHL and
		// SRL, but not for SRA because of the instructions available:
		// VS{RL} and VS{RL}O.
		setOperationAction(ISD::SHL, MVT::v1i128, Legal);
		setOperationAction(ISD::SRL, MVT::v1i128, Legal);
		setOperationAction(ISD::SRA, MVT::v1i128, Expand);
}		}
}		}

if (Subtarget.hasQPX()) {		if (Subtarget.hasQPX()) {
setOperationAction(ISD::FADD, MVT::v4f64, Legal);		setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);		setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);		setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FREM, MVT::v4f64, Expand);		setOperationAction(ISD::FREM, MVT::v4f64, Expand);
▲ Show 20 Lines • Show All 12,188 Lines • Show Last 20 Lines

lib/Target/PowerPC/PPCInstrAltivec.td

	Show First 20 Lines • Show All 981 Lines • ▼ Show 20 Lines

	// Vector shifts			// Vector shifts
	def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),			def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
	(v16i8 (VSLB $vA, $vB))>;			(v16i8 (VSLB $vA, $vB))>;
	def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),			def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
	(v8i16 (VSLH $vA, $vB))>;			(v8i16 (VSLH $vA, $vB))>;
	def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),			def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
	(v4i32 (VSLW $vA, $vB))>;			(v4i32 (VSLW $vA, $vB))>;
				def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)),
				(v1i128 (VSL (VSLO $vA, $vB), $vB))>;
				nemanjaiUnsubmitted Done Reply Inline Actions I think these patterns are missing something. I understand that the shift amount is split up between the `VSLO` and `VSL` (i.e. shifting by 9 is meant to shift `$vA` by one byte and one bit). However, for the `VSL`, every byte in `$vB` needs to have the same value in the low order 3 bits or the result is undefined. So I think we need to `VSPLTB 15` the `$vB`. nemanjai: I think these patterns are missing something. I understand that the shift amount is split up…
				iterateeAuthorUnsubmitted Not Done Reply Inline Actions The documentation says that for VSL it just uses bits 125:127 from VRB. iteratee: The documentation says that for VSL it just uses bits 125:127 from VRB.
				nemanjaiUnsubmitted Done Reply Inline Actions From Power ISA 3.0: The result is place into VR[VRT], except if, for any byte element in register VR[VRB], the low-order 3 bits are not equal to the shift amount, then VR[VRT] is undefined. And there have apparently been cores in the past (I don't know which ones) that would not produce the correct results if the bottom 3 bits were not the same in all the byte elements. So I'm afraid we'll need this `VSPLTB`. nemanjai: From Power ISA 3.0: The result is place into VR[VRT], except if, for any byte element in…
				iterateeAuthorUnsubmitted Not Done Reply Inline Actions OK. But we only want to generate this sequence on Power 9 anyway. Is this well defined on power9? iteratee: OK. But we only want to generate this sequence on Power 9 anyway. Is this well defined on…

	def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),			def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
	(v16i8 (VSRB $vA, $vB))>;			(v16i8 (VSRB $vA, $vB))>;
	def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),			def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
	(v8i16 (VSRH $vA, $vB))>;			(v8i16 (VSRH $vA, $vB))>;
	def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),			def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
	(v4i32 (VSRW $vA, $vB))>;			(v4i32 (VSRW $vA, $vB))>;
				def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)),
				(v1i128 (VSR (VSRO $vA, $vB), $vB))>;

	def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),			def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
	(v16i8 (VSRAB $vA, $vB))>;			(v16i8 (VSRAB $vA, $vB))>;
	def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),			def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
	(v8i16 (VSRAH $vA, $vB))>;			(v8i16 (VSRAH $vA, $vB))>;
	def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),			def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
	(v4i32 (VSRAW $vA, $vB))>;			(v4i32 (VSRAW $vA, $vB))>;

	▲ Show 20 Lines • Show All 451 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/shift128.ll

	; RUN: llc -verify-machineinstrs < %s -march=ppc64 \| grep sld \| count 5			; RUN: llc -verify-machineinstrs < %s \| FileCheck %s
				target datalayout = "e-m:e-i64:64-n32:64"
				target triple = "powerpc64le-grtev4-linux-gnu"

				kbartonUnsubmitted Done Reply Inline Actions I don't recognize this triple. We typically use powerpc64le-unknown-linux-gnu. kbarton: I don't recognize this triple. We typically use powerpc64le-unknown-linux-gnu.
				echristoUnsubmitted Done Reply Inline Actions Yeah, the vendor part is a nop here, but should be changed. echristo: Yeah, the vendor part is a nop here, but should be changed.
	define i128 @foo_lshr(i128 %x, i128 %y) {			; CHECK-LABEL: lshr:
				; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
				; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
				; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
				; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
				; CHECK-DAG: srd [[R4:[0-9]+]], 4, [[R1]]
				; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
				; CHECK-DAG: or 3, [[R5]], [[R4]]
				; CHECK-DAG: srd 4, 4, 5
				; CHECK: blr
				define i128 @lshr(i128 %x, i128 %y) {
	%r = lshr i128 %x, %y			%r = lshr i128 %x, %y
	ret i128 %r			ret i128 %r
	}			}
	define i128 @foo_ashr(i128 %x, i128 %y) {			; CHECK-LABEL: ashr:
				; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
				; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
				; CHECK-DAG: srd [[R2:[0-9]+]], 3, 5
				; CHECK-DAG: sld [[R3:[0-9]+]], 4, [[R0]]
				; CHECK-DAG: srad [[R4:[0-9]+]], 4, [[R1]]
				; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
				; CHECK-DAG: cmpwi [[R1]], 1
				; CHECK-DAG: srad 4, 4, 5
				; CHECK: isel 3, [[R5]], [[R4]], 0
				; CHECK: blr
				define i128 @ashr(i128 %x, i128 %y) {
	%r = ashr i128 %x, %y			%r = ashr i128 %x, %y
	ret i128 %r			ret i128 %r
	}			}
	define i128 @foo_shl(i128 %x, i128 %y) {			; CHECK-LABEL: shl:
				; CHECK-DAG: subfic [[R0:[0-9]+]], 5, 64
				; CHECK-DAG: addi [[R1:[0-9]+]], 5, -64
				; CHECK-DAG: sld [[R2:[0-9]+]], 4, 5
				; CHECK-DAG: srd [[R3:[0-9]+]], 3, [[R0]]
				; CHECK-DAG: sld [[R4:[0-9]+]], 3, [[R1]]
				; CHECK-DAG: or [[R5:[0-9]+]], [[R2]], [[R3]]
				; CHECK-DAG: or 4, [[R5]], [[R4]]
				; CHECK-DAG: sld 3, 3, 5
				; CHECK: blr
				define i128 @shl(i128 %x, i128 %y) {
	%r = shl i128 %x, %y			%r = shl i128 %x, %y
	ret i128 %r			ret i128 %r
	}			}

				; CHECK-LABEL: shl_v1i128:
				; CHECK-NOT: vslo
				; CHECK-NOT: vsl
				; CHECK: blr
				define i128 @shl_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
				entry:
				%0 = insertelement <1 x i128> undef, i128 %arg, i32 0
				%1 = insertelement <1 x i128> undef, i128 %amt, i32 0
				%2 = shl <1 x i128> %0, %1
				%retval = extractelement <1 x i128> %2, i32 0
				ret i128 %retval
				}

				; CHECK-LABEL: lshr_v1i128:
				; CHECK-NOT: {{\b}}vsro
				; CHECK-NOT: {{\b}}vsr
				; CHECK: blr
				define i128 @lshr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
				entry:
				%0 = insertelement <1 x i128> undef, i128 %arg, i32 0
				%1 = insertelement <1 x i128> undef, i128 %amt, i32 0
				%2 = lshr <1 x i128> %0, %1
				%retval = extractelement <1 x i128> %2, i32 0
				ret i128 %retval
				}

				; Arithmetic shift right is not available as an operation on the vector registers.
				; CHECK-LABEL: ashr_v1i128:
				; CHECK-NOT: {{\b}}vsro
				; CHECK-NOT: {{\b}}vsr
				; CHECK: blr
				define i128 @ashr_v1i128(i128 %arg, i128 %amt) local_unnamed_addr #0 {
				entry:
				%0 = insertelement <1 x i128> undef, i128 %arg, i32 0
				%1 = insertelement <1 x i128> undef, i128 %amt, i32 0
				%2 = ashr <1 x i128> %0, %1
				%retval = extractelement <1 x i128> %2, i32 0
				ret i128 %retval
				}

This is an archive of the discontinued LLVM Phabricator instance.

CodeGen: Power: Add lowering for shifts of v1i128.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 97525

lib/Target/PowerPC/PPCISelLowering.cpp

lib/Target/PowerPC/PPCInstrAltivec.td

test/CodeGen/PowerPC/shift128.ll

This is an archive of the discontinued LLVM Phabricator instance.

CodeGen: Power: Add lowering for shifts of v1i128.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 97525

lib/Target/PowerPC/PPCISelLowering.cpp

lib/Target/PowerPC/PPCInstrAltivec.td

test/CodeGen/PowerPC/shift128.ll

CodeGen: Power: Add lowering for shifts of v1i128.
ClosedPublic