Diff 287469

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,856 Lines • ▼ Show 20 Lines	SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
// undefined):		// undefined):
// < MSB1\|LSB1, MSB2\|LSB2, uu, uu, uu, uu, uu, uu> to		// < MSB1\|LSB1, MSB2\|LSB2, uu, uu, uu, uu, uu, uu> to
// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>		// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
//		//
// The same operation in little-endian ordering will be:		// The same operation in little-endian ordering will be:
// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to		// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to
// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>		// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>

assert(Op.getValueType().isVector() && "Vector type expected.");

SDLoc DL(Op);
SDValue N1 = Op.getOperand(0);
unsigned SrcSize = N1.getValueType().getSizeInBits();
assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);

EVT TrgVT = Op.getValueType();		EVT TrgVT = Op.getValueType();
		assert(TrgVT.isVector() && "Vector type expected.");
unsigned TrgNumElts = TrgVT.getVectorNumElements();		unsigned TrgNumElts = TrgVT.getVectorNumElements();
EVT EltVT = TrgVT.getVectorElementType();		EVT EltVT = TrgVT.getVectorElementType();
		if (!isOperationCustom(Op.getOpcode(), TrgVT) \|\|
		TrgVT.getSizeInBits() > 128 \|\| !isPowerOf2_32(TrgNumElts) \|\|
		!isPowerOf2_32(EltVT.getSizeInBits()))
		return SDValue();

		SDValue N1 = Op.getOperand(0);
		EVT SrcVT = N1.getValueType();
		unsigned SrcSize = SrcVT.getSizeInBits();
		if (SrcSize > 256 \|\|
		!isPowerOf2_32(SrcVT.getVectorNumElements()) \|\|
		!isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
		return SDValue();
		if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
		return SDValue();

unsigned WideNumElts = 128 / EltVT.getSizeInBits();		unsigned WideNumElts = 128 / EltVT.getSizeInBits();
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);		EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

		SDLoc DL(Op);
		SDValue Op1, Op2;
		if (SrcSize == 256) {
		EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
		nemanjaiUnsubmitted Done Reply Inline Actions I believe `DAG.getTargetLoweringInfo()` is just `this` so we should be able to just call `getVectorIdxTy()` unqualified. nemanjai:* I believe `DAG.getTargetLoweringInfo()` is just `*this` so we should be able to just call…
		EVT SplitVT =
		N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
		unsigned SplitNumElts = SplitVT.getVectorNumElements();
		nemanjaiUnsubmitted Done Reply Inline Actions Isn't `SplitVT` just `SrcVT.getHalfNumVectorElementsVT(DAG.getContext())`? Seems that all of the definitions here can just be something like: EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout()); EVT SplitVT = N1.getValueType().getHalfNumVectorElementsVT(DAG.getContext()); unsigned SplitNumElts = SplitVT.getVectorNumElements(); nemanjai: Isn't `SplitVT` just `SrcVT.getHalfNumVectorElementsVT(*DAG.getContext())`? Seems that all of…
		Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
		DAG.getConstant(0, DL, VecIdxTy));
		Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
		DAG.getConstant(SplitNumElts, DL, VecIdxTy));
		}
		nemanjaiUnsubmitted Not Done Reply Inline Actions I realize that we don't need to bitcast `Op1` here because it already gets bitcasted below. But I think it is one of those things that a reader looking at this code thinks "Huh? How come only one is bitcasted?" (until they get to the bottom of the function). So my minor nit here would be to just do both bicasts at the same point below unconditionally. Bitcasting to the same type should not produce a new node. nemanjai: I realize that we don't need to bitcast `Op1` here because it already gets bitcasted below. But…
		else {
		Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
		Op2 = DAG.getUNDEF(WideVT);
		}

// First list the elements we want to keep.		// First list the elements we want to keep.
unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();		unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
SmallVector<int, 16> ShuffV;		SmallVector<int, 16> ShuffV;
if (Subtarget.isLittleEndian())		if (Subtarget.isLittleEndian())
for (unsigned i = 0; i < TrgNumElts; ++i)		for (unsigned i = 0; i < TrgNumElts; ++i)
ShuffV.push_back(i * SizeMult);		ShuffV.push_back(i * SizeMult);
else		else
for (unsigned i = 1; i <= TrgNumElts; ++i)		for (unsigned i = 1; i <= TrgNumElts; ++i)
ShuffV.push_back(i * SizeMult - 1);		ShuffV.push_back(i * SizeMult - 1);

// Populate the remaining elements with undefs.		// Populate the remaining elements with undefs.
for (unsigned i = TrgNumElts; i < WideNumElts; ++i)		for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
// ShuffV.push_back(i + WideNumElts);		// ShuffV.push_back(i + WideNumElts);
ShuffV.push_back(WideNumElts + 1);		ShuffV.push_back(WideNumElts + 1);

SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);		Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);		Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
		return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
}		}

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when		/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.		/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {		SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Not FP, or using SPE? Not a fsel.		// Not FP, or using SPE? Not a fsel.
if (!Op.getOperand(0).getValueType().isFloatingPoint() \|\|		if (!Op.getOperand(0).getValueType().isFloatingPoint() \|\|
!Op.getOperand(2).getValueType().isFloatingPoint() \|\| Subtarget.hasSPE())		!Op.getOperand(2).getValueType().isFloatingPoint() \|\| Subtarget.hasSPE())
▲ Show 20 Lines • Show All 2,840 Lines • ▼ Show 20 Lines	void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::FP_TO_SINT:		case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:		case ISD::FP_TO_UINT:
// LowerFP_TO_INT() can only handle f32 and f64.		// LowerFP_TO_INT() can only handle f32 and f64.
if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==		if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
MVT::ppcf128)		MVT::ppcf128)
return;		return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));		Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;		return;
case ISD::TRUNCATE: {		case ISD::TRUNCATE: {
EVT TrgVT = N->getValueType(0);		if (!N->getValueType(0).isVector())
		nemanjaiUnsubmitted Not Done Reply Inline Actions We check that the number of elements in the wide input is a power of 2 as well as that the target element type width is a power of 2. What happens with something weird like `trunc <8 x i24> to <8 x i16>` (also, please add such a test case)? nemanjai: We check that the number of elements in the wide input is a power of 2 as well as that the…
		RolandFAuthorUnsubmitted Done Reply Inline Actions Moved checks to lowering function. There was an existing check for source bit width. so 8x24 was skipped.. Added some additional checking. Added an 8x24 test to the test case. RolandF: Moved checks to lowering function. There was an existing check for source bit width. so 8x24…
EVT OpVT = N->getOperand(0).getValueType();		return;
if (TrgVT.isVector() &&		SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
isOperationCustom(N->getOpcode(), TrgVT) &&		if (Lowered)
OpVT.getSizeInBits() <= 128 &&		Results.push_back(Lowered);
isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
return;		return;
}		}
case ISD::BITCAST:		case ISD::BITCAST:
// Don't handle bitcast here.		// Don't handle bitcast here.
return;		return;
case ISD::FP_EXTEND:		case ISD::FP_EXTEND:
SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);		SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
if (Lowered)		if (Lowered)
▲ Show 20 Lines • Show All 5,756 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/vec-trunc2.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
				; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s \| \
				; RUN: FileCheck %s
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
				; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s \| \
				; RUN: FileCheck %s --check-prefix=CHECK-BE

				define dso_local <8 x i8> @test8x32(i32 %i1, i32 %i2, i32 %i3, i32 %i4,
				i32 %i5, i32 %i6, i32 %i7, i32 %i8) {
				; CHECK-LABEL: test8x32:
				; CHECK: # %bb.0:
				; CHECK-NEXT: rldimi r3, r4, 32, 0
				; CHECK-NEXT: rldimi r5, r6, 32, 0
				; CHECK-NEXT: addis r11, r2, .LCPI0_0@toc@ha
				; CHECK-NEXT: rldimi r7, r8, 32, 0
				; CHECK-NEXT: rldimi r9, r10, 32, 0
				; CHECK-NEXT: mtfprd f0, r3
				; CHECK-NEXT: addi r3, r11, .LCPI0_0@toc@l
				qiucfUnsubmitted Done Reply Inline Actions Use `CHECK-COUNT`? qiucf: Use `CHECK-COUNT`?
				; CHECK-NEXT: mtfprd f1, r5
				; CHECK-NEXT: lvx v4, 0, r3
				; CHECK-NEXT: mtfprd f2, r7
				nemanjaiUnsubmitted Done Reply Inline Actions Please add a check that shows how we produced `v3`. Presumably this is a shift from `v2`? nemanjai: Please add a check that shows how we produced `v3`. Presumably this is a shift from `v2`?
				; CHECK-NEXT: mtfprd f3, r9
				; CHECK-NEXT: xxmrghd v2, vs1, vs0
				; CHECK-NEXT: xxmrghd v3, vs3, vs2
				; CHECK-NEXT: vperm v2, v3, v2, v4
				; CHECK-NEXT: blr
				;
				; CHECK-BE-LABEL: test8x32:
				; CHECK-BE: # %bb.0:
				; CHECK-BE-NEXT: stw r10, -80(r1)
				; CHECK-BE-NEXT: stw r9, -96(r1)
				; CHECK-BE-NEXT: stw r8, -112(r1)
				; CHECK-BE-NEXT: stw r7, -128(r1)
				; CHECK-BE-NEXT: stw r6, -16(r1)
				; CHECK-BE-NEXT: stw r5, -32(r1)
				; CHECK-BE-NEXT: stw r4, -48(r1)
				; CHECK-BE-NEXT: stw r3, -64(r1)
				; CHECK-BE-NEXT: addi r3, r1, -80
				; CHECK-BE-NEXT: lxvw4x v2, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -96
				; CHECK-BE-NEXT: lxvw4x v3, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -112
				; CHECK-BE-NEXT: lxvw4x v4, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -128
				; CHECK-BE-NEXT: lxvw4x v5, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -16
				; CHECK-BE-NEXT: lxvw4x v0, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -32
				; CHECK-BE-NEXT: lxvw4x v1, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -48
				; CHECK-BE-NEXT: lxvw4x v6, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -64
				; CHECK-BE-NEXT: lxvw4x v7, 0, r3
				; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
				; CHECK-BE-NEXT: vmrghw v2, v3, v2
				; CHECK-BE-NEXT: vmrghw v3, v5, v4
				; CHECK-BE-NEXT: vmrghw v4, v1, v0
				; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
				; CHECK-BE-NEXT: xxmrghd v2, v3, v2
				; CHECK-BE-NEXT: lxvw4x v8, 0, r3
				; CHECK-BE-NEXT: vmrghw v5, v7, v6
				; CHECK-BE-NEXT: xxmrghd v3, v5, v4
				; CHECK-BE-NEXT: vperm v2, v3, v2, v8
				; CHECK-BE-NEXT: blr
				%v10 = insertelement <8 x i32> undef, i32 %i1, i32 0
				nemanjaiUnsubmitted Not Done Reply Inline Actions It's bizarre that the script put the checks inside the signature of the function. nemanjai: It's bizarre that the script put the checks inside the signature of the function.
				%v11 = insertelement <8 x i32> %v10, i32 %i2, i32 1
				%v12 = insertelement <8 x i32> %v11, i32 %i3, i32 2
				%v13 = insertelement <8 x i32> %v12, i32 %i4, i32 3
				%v14 = insertelement <8 x i32> %v13, i32 %i5, i32 4
				%v15 = insertelement <8 x i32> %v14, i32 %i6, i32 5
				%v16 = insertelement <8 x i32> %v15, i32 %i7, i32 6
				%v17 = insertelement <8 x i32> %v16, i32 %i8, i32 7
				%v2 = trunc <8 x i32> %v17 to <8 x i8>
				ret <8 x i8> %v2
				}

				define dso_local <4 x i16> @test4x64(i64 %i1, i64 %i2, i64 %i3, i64 %i4) {
				; CHECK-LABEL: test4x64:
				; CHECK: # %bb.0:
				; CHECK-NEXT: addis r7, r2, .LCPI1_0@toc@ha
				; CHECK-NEXT: mtfprd f0, r5
				; CHECK-NEXT: mtfprd f1, r6
				; CHECK-NEXT: mtfprd f2, r3
				; CHECK-NEXT: addi r3, r7, .LCPI1_0@toc@l
				; CHECK-NEXT: mtfprd f3, r4
				; CHECK-NEXT: xxmrghd v2, vs1, vs0
				; CHECK-NEXT: lvx v4, 0, r3
				; CHECK-NEXT: xxmrghd v3, vs3, vs2
				; CHECK-NEXT: vperm v2, v2, v3, v4
				; CHECK-NEXT: blr
				;
				; CHECK-BE-LABEL: test4x64:
				; CHECK-BE: # %bb.0:
				; CHECK-BE-NEXT: std r6, -8(r1)
				; CHECK-BE-NEXT: std r5, -16(r1)
				; CHECK-BE-NEXT: std r4, -24(r1)
				; CHECK-BE-NEXT: std r3, -32(r1)
				; CHECK-BE-NEXT: addi r3, r1, -32
				; CHECK-BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
				; CHECK-BE-NEXT: addi r7, r1, -16
				; CHECK-BE-NEXT: lxvd2x v3, 0, r3
				; CHECK-BE-NEXT: addi r3, r4, .LCPI1_0@toc@l
				; CHECK-BE-NEXT: lxvd2x v2, 0, r7
				; CHECK-BE-NEXT: lxvw4x v4, 0, r3
				; CHECK-BE-NEXT: vperm v2, v3, v2, v4
				; CHECK-BE-NEXT: blr
				%v10 = insertelement <4 x i64> undef, i64 %i1, i32 0
				%v11 = insertelement <4 x i64> %v10, i64 %i2, i32 1
				%v12 = insertelement <4 x i64> %v11, i64 %i3, i32 2
				%v13 = insertelement <4 x i64> %v12, i64 %i4, i32 3
				%v2 = trunc <4 x i64> %v13 to <4 x i16>
				ret <4 x i16> %v2
				}

				define dso_local <8 x i16> @test8x24(i32 %i1, i32 %i2, i32 %i3, i32 %i4,
				i32 %i5, i32 %i6, i32 %i7, i32 %i8) {
				; CHECK-LABEL: test8x24:
				; CHECK: # %bb.0:
				; CHECK-NEXT: mtvsrd v2, r3
				; CHECK-NEXT: mtvsrd v3, r4
				; CHECK-NEXT: mtvsrd v4, r5
				; CHECK-NEXT: mtvsrd v5, r6
				; CHECK-NEXT: mtvsrd v0, r7
				; CHECK-NEXT: mtvsrd v1, r8
				; CHECK-NEXT: vmrghh v2, v3, v2
				; CHECK-NEXT: mtvsrd v3, r9
				; CHECK-NEXT: vmrghh v4, v5, v4
				; CHECK-NEXT: mtvsrd v5, r10
				; CHECK-NEXT: vmrghh v0, v1, v0
				; CHECK-NEXT: vmrghh v3, v5, v3
				; CHECK-NEXT: vmrglw v2, v4, v2
				; CHECK-NEXT: vmrglw v3, v3, v0
				; CHECK-NEXT: xxmrgld v2, v3, v2
				; CHECK-NEXT: blr
				;
				; CHECK-BE-LABEL: test8x24:
				; CHECK-BE: # %bb.0:
				; CHECK-BE-NEXT: sth r10, -16(r1)
				; CHECK-BE-NEXT: sth r9, -32(r1)
				; CHECK-BE-NEXT: sth r8, -48(r1)
				; CHECK-BE-NEXT: sth r7, -64(r1)
				; CHECK-BE-NEXT: sth r6, -80(r1)
				; CHECK-BE-NEXT: sth r5, -96(r1)
				; CHECK-BE-NEXT: sth r4, -112(r1)
				; CHECK-BE-NEXT: sth r3, -128(r1)
				; CHECK-BE-NEXT: addi r3, r1, -16
				; CHECK-BE-NEXT: lxvw4x v2, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -32
				; CHECK-BE-NEXT: lxvw4x v3, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -48
				; CHECK-BE-NEXT: lxvw4x v4, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -64
				; CHECK-BE-NEXT: lxvw4x v5, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -80
				; CHECK-BE-NEXT: lxvw4x v0, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -96
				; CHECK-BE-NEXT: lxvw4x v1, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -112
				; CHECK-BE-NEXT: lxvw4x v6, 0, r3
				; CHECK-BE-NEXT: addi r3, r1, -128
				; CHECK-BE-NEXT: lxvw4x v7, 0, r3
				; CHECK-BE-NEXT: vmrghh v2, v3, v2
				; CHECK-BE-NEXT: vmrghh v3, v5, v4
				; CHECK-BE-NEXT: vmrghh v4, v1, v0
				; CHECK-BE-NEXT: vmrghw v2, v3, v2
				; CHECK-BE-NEXT: vmrghh v5, v7, v6
				; CHECK-BE-NEXT: vmrghw v3, v5, v4
				; CHECK-BE-NEXT: xxmrghd v2, v3, v2
				; CHECK-BE-NEXT: blr
				%i11 = trunc i32 %i1 to i24
				%i21 = trunc i32 %i2 to i24
				%i31 = trunc i32 %i3 to i24
				%i41 = trunc i32 %i4 to i24
				%i51 = trunc i32 %i5 to i24
				%i61 = trunc i32 %i6 to i24
				%i71 = trunc i32 %i7 to i24
				%i81 = trunc i32 %i8 to i24
				%v10 = insertelement <8 x i24> undef, i24 %i11, i32 0
				%v11 = insertelement <8 x i24> %v10, i24 %i21, i32 1
				%v12 = insertelement <8 x i24> %v11, i24 %i31, i32 2
				%v13 = insertelement <8 x i24> %v12, i24 %i41, i32 3
				%v14 = insertelement <8 x i24> %v13, i24 %i51, i32 4
				%v15 = insertelement <8 x i24> %v14, i24 %i61, i32 5
				%v16 = insertelement <8 x i24> %v15, i24 %i71, i32 6
				%v17 = insertelement <8 x i24> %v16, i24 %i81, i32 7
				%v2 = trunc <8 x i24> %v17 to <8 x i16>
				ret <8 x i16> %v2
				}

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Extend custom lower of vector truncate to handle wider input
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 287469

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/test/CodeGen/PowerPC/vec-trunc2.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Extend custom lower of vector truncate to handle wider inputClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 287469

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/test/CodeGen/PowerPC/vec-trunc2.ll

[PowerPC] Extend custom lower of vector truncate to handle wider input
ClosedPublic