Diff 222174

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,146 Lines • ▼ Show 20 Lines	SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to		// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to
// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>		// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>

assert(Op.getValueType().isVector() && "Vector type expected.");		assert(Op.getValueType().isVector() && "Vector type expected.");

SDLoc DL(Op);		SDLoc DL(Op);
SDValue N1 = Op.getOperand(0);		SDValue N1 = Op.getOperand(0);
unsigned SrcSize = N1.getValueType().getSizeInBits();		unsigned SrcSize = N1.getValueType().getSizeInBits();
assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");		assert(SrcSize <= 256 && "Source must fit in two Altivec/VSX vectors");
SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);

EVT TrgVT = Op.getValueType();		EVT TrgVT = Op.getValueType();
unsigned TrgNumElts = TrgVT.getVectorNumElements();		unsigned TrgNumElts = TrgVT.getVectorNumElements();
EVT EltVT = TrgVT.getVectorElementType();		EVT EltVT = TrgVT.getVectorElementType();
unsigned WideNumElts = 128 / EltVT.getSizeInBits();		unsigned WideNumElts = 128 / EltVT.getSizeInBits();
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);		EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

		SDValue Op1, Op2;
		if (SrcSize == 256) {
		EVT VecIdxTy =
		DAG.getTargetLoweringInfo().getVectorIdxTy(DAG.getDataLayout());
		nemanjaiUnsubmitted Done Reply Inline Actions I believe `DAG.getTargetLoweringInfo()` is just `this` so we should be able to just call `getVectorIdxTy()` unqualified. nemanjai:* I believe `DAG.getTargetLoweringInfo()` is just `*this` so we should be able to just call…
		EVT SrcEltVT = N1.getValueType().getVectorElementType();
		unsigned SplitNumElts = 128 / SrcEltVT.getSizeInBits();
		EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, SplitNumElts);
		nemanjaiUnsubmitted Done Reply Inline Actions Isn't `SplitVT` just `SrcVT.getHalfNumVectorElementsVT(DAG.getContext())`? Seems that all of the definitions here can just be something like: EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout()); EVT SplitVT = N1.getValueType().getHalfNumVectorElementsVT(DAG.getContext()); unsigned SplitNumElts = SplitVT.getVectorNumElements(); nemanjai: Isn't `SplitVT` just `SrcVT.getHalfNumVectorElementsVT(*DAG.getContext())`? Seems that all of…
		Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
		DAG.getConstant(0, DL, VecIdxTy));
		Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
		DAG.getConstant(SplitNumElts, DL, VecIdxTy));
		Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
		nemanjaiUnsubmitted Not Done Reply Inline Actions I realize that we don't need to bitcast `Op1` here because it already gets bitcasted below. But I think it is one of those things that a reader looking at this code thinks "Huh? How come only one is bitcasted?" (until they get to the bottom of the function). So my minor nit here would be to just do both bicasts at the same point below unconditionally. Bitcasting to the same type should not produce a new node. nemanjai: I realize that we don't need to bitcast `Op1` here because it already gets bitcasted below. But…
		}
		else {
		Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
		Op2 = DAG.getUNDEF(WideVT);
		}

// First list the elements we want to keep.		// First list the elements we want to keep.
unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();		unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
SmallVector<int, 16> ShuffV;		SmallVector<int, 16> ShuffV;
if (Subtarget.isLittleEndian())		if (Subtarget.isLittleEndian())
for (unsigned i = 0; i < TrgNumElts; ++i)		for (unsigned i = 0; i < TrgNumElts; ++i)
ShuffV.push_back(i * SizeMult);		ShuffV.push_back(i * SizeMult);
else		else
for (unsigned i = 1; i <= TrgNumElts; ++i)		for (unsigned i = 1; i <= TrgNumElts; ++i)
ShuffV.push_back(i * SizeMult - 1);		ShuffV.push_back(i * SizeMult - 1);

// Populate the remaining elements with undefs.		// Populate the remaining elements with undefs.
for (unsigned i = TrgNumElts; i < WideNumElts; ++i)		for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
// ShuffV.push_back(i + WideNumElts);		// ShuffV.push_back(i + WideNumElts);
ShuffV.push_back(WideNumElts + 1);		ShuffV.push_back(WideNumElts + 1);

SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);		SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);		return DAG.getVectorShuffle(WideVT, DL, Conv, Op2, ShuffV);
}		}

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when		/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.		/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {		SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Not FP? Not a fsel.		// Not FP? Not a fsel.
if (!Op.getOperand(0).getValueType().isFloatingPoint() \|\|		if (!Op.getOperand(0).getValueType().isFloatingPoint() \|\|
!Op.getOperand(2).getValueType().isFloatingPoint())		!Op.getOperand(2).getValueType().isFloatingPoint())
▲ Show 20 Lines • Show All 2,994 Lines • ▼ Show 20 Lines	if (N->getOperand(0).getValueType() == MVT::ppcf128)
return;		return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));		Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;		return;
case ISD::TRUNCATE: {		case ISD::TRUNCATE: {
EVT TrgVT = N->getValueType(0);		EVT TrgVT = N->getValueType(0);
EVT OpVT = N->getOperand(0).getValueType();		EVT OpVT = N->getOperand(0).getValueType();
if (TrgVT.isVector() &&		if (TrgVT.isVector() &&
isOperationCustom(N->getOpcode(), TrgVT) &&		isOperationCustom(N->getOpcode(), TrgVT) &&
OpVT.getSizeInBits() <= 128 &&		OpVT.getSizeInBits() <= 256 &&
		isPowerOf2_32(TrgVT.getVectorElementType().getSizeInBits()) &&
		TrgVT.getSizeInBits() <= 128 &&
		nemanjaiUnsubmitted Not Done Reply Inline Actions We check that the number of elements in the wide input is a power of 2 as well as that the target element type width is a power of 2. What happens with something weird like `trunc <8 x i24> to <8 x i16>` (also, please add such a test case)? nemanjai: We check that the number of elements in the wide input is a power of 2 as well as that the…
		RolandFAuthorUnsubmitted Done Reply Inline Actions Moved checks to lowering function. There was an existing check for source bit width. so 8x24 was skipped.. Added some additional checking. Added an 8x24 test to the test case. RolandF: Moved checks to lowering function. There was an existing check for source bit width. so 8x24…
isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))		isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));		Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
return;		return;
}		}
case ISD::BITCAST:		case ISD::BITCAST:
// Don't handle bitcast here.		// Don't handle bitcast here.
return;		return;
}		}
▲ Show 20 Lines • Show All 5,317 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/vec-trunc2.ll

This file was added.

				; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
				; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s \| \
				; RUN: FileCheck %s
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
				; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s \| \
				; RUN: FileCheck %s --check-prefix=CHECK-BE

				define dso_local <8 x i8> @test8x32(<8 x i32> %v1) {
				; CHECK-LABEL: .LCPI0_0:
				; CHECK-NEXT: byte 31
				; CHECK-NEXT: byte 27
				; CHECK-NEXT: byte 23
				; CHECK-NEXT: byte 19
				; CHECK-NEXT: byte 15
				; CHECK-NEXT: byte 11
				; CHECK-NEXT: byte 7
				; CHECK-NEXT: byte 3
				; CHECK-NEXT: byte 14
				qiucfUnsubmitted Done Reply Inline Actions Use `CHECK-COUNT`? qiucf: Use `CHECK-COUNT`?
				; CHECK-NEXT: byte 14
				; CHECK-NEXT: byte 14
				; CHECK-NEXT: byte 14
				; CHECK-NEXT: byte 14
				nemanjaiUnsubmitted Done Reply Inline Actions Please add a check that shows how we produced `v3`. Presumably this is a shift from `v2`? nemanjai: Please add a check that shows how we produced `v3`. Presumably this is a shift from `v2`?
				; CHECK-NEXT: byte 14
				; CHECK-NEXT: byte 14
				; CHECK-NEXT: byte 14
				; CHECK-LABEL: test8x32:
				; CHECK: lvx v4, 0, r3
				; CHECK: vperm v2, v3, v2, v4
				; CHECK: blr
				;
				; CHECK-BE-LABEL: .LCPI0_0:
				; CHECK-BE-NEXT: 3
				; CHECK-BE-NEXT: 7
				; CHECK-BE-NEXT: 11
				; CHECK-BE-NEXT: 15
				; CHECK-BE-NEXT: 19
				; CHECK-BE-NEXT: 23
				; CHECK-BE-NEXT: 27
				; CHECK-BE-NEXT: 31
				; CHECK-BE-NEXT: 17
				; CHECK-BE-NEXT: 17
				; CHECK-BE-NEXT: 17
				; CHECK-BE-NEXT: 17
				; CHECK-BE-NEXT: 17
				; CHECK-BE-NEXT: 17
				; CHECK-BE-NEXT: 17
				; CHECK-BE-NEXT: 17
				; CHECK-BE-LABEL: test8x32:
				; CHECK-BE: lxvw4x v4, 0, r3
				; CHECK-BE: vperm v2, v2, v3, v4
				; CHECK-BE: blr
				%v2 = trunc <8 x i32> %v1 to <8 x i8>
				ret <8 x i8> %v2
				}

				define dso_local <4 x i16> @test4x64(<4 x i64> %v1) {
				;CHECK-LABEL: .LCPI1_0:
				;CHECK-NEXT: byte 31
				;CHECK-NEXT: byte 30
				;CHECK-NEXT: byte 23
				;CHECK-NEXT: byte 22
				;CHECK-NEXT: byte 15
				;CHECK-NEXT: byte 14
				;CHECK-NEXT: byte 7
				;CHECK-NEXT: byte 6
				;CHECK-NEXT: byte 13
				nemanjaiUnsubmitted Not Done Reply Inline Actions It's bizarre that the script put the checks inside the signature of the function. nemanjai: It's bizarre that the script put the checks inside the signature of the function.
				;CHECK-NEXT: byte 12
				;CHECK-NEXT: byte 13
				;CHECK-NEXT: byte 12
				;CHECK-NEXT: byte 13
				;CHECK-NEXT: byte 12
				;CHECK-NEXT: byte 13
				;CHECK-NEXT: byte 12
				; CHECK-LABEL: test4x64:
				; CHECK: lvx v4, 0, r3
				; CHECK: vperm v2, v3, v2, v4
				; CHECK: blr
				;
				; CHECK-BE-LABEL: .LCPI1_0:
				; CHECK-BE-NEXT: 6
				; CHECK-BE-NEXT: 7
				; CHECK-BE-NEXT: 14
				; CHECK-BE-NEXT: 15
				; CHECK-BE-NEXT: 22
				; CHECK-BE-NEXT: 23
				; CHECK-BE-NEXT: 30
				; CHECK-BE-NEXT: 31
				; CHECK-BE-NEXT: 18
				; CHECK-BE-NEXT: 19
				; CHECK-BE-NEXT: 18
				; CHECK-BE-NEXT: 19
				; CHECK-BE-NEXT: 18
				; CHECK-BE-NEXT: 19
				; CHECK-BE-NEXT: 18
				; CHECK-BE-NEXT: 19
				; CHECK-BE-LABEL: test4x64:
				; CHECK-BE: lxvw4x v4, 0, r3
				; CHECK-BE: vperm v2, v2, v3, v4
				; CHECK-BE: blr
				%v2 = trunc <4 x i64> %v1 to <4 x i16>
				ret <4 x i16> %v2
				}

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Extend custom lower of vector truncate to handle wider input
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 222174

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/test/CodeGen/PowerPC/vec-trunc2.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Extend custom lower of vector truncate to handle wider inputClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 222174

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/test/CodeGen/PowerPC/vec-trunc2.ll

[PowerPC] Extend custom lower of vector truncate to handle wider input
ClosedPublic