Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7152,8 +7152,7 @@ SDLoc DL(Op); SDValue N1 = Op.getOperand(0); unsigned SrcSize = N1.getValueType().getSizeInBits(); - assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector"); - SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); + assert(SrcSize <= 256 && "Source must fit in two Altivec/VSX vectors"); EVT TrgVT = Op.getValueType(); unsigned TrgNumElts = TrgVT.getVectorNumElements(); @@ -7161,6 +7160,24 @@ unsigned WideNumElts = 128 / EltVT.getSizeInBits(); EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); + SDValue Op1, Op2; + if (SrcSize == 256) { + EVT VecIdxTy = + DAG.getTargetLoweringInfo().getVectorIdxTy(DAG.getDataLayout()); + EVT SrcEltVT = N1.getValueType().getVectorElementType(); + unsigned SplitNumElts = 128 / SrcEltVT.getSizeInBits(); + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, SplitNumElts); + Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, + DAG.getConstant(0, DL, VecIdxTy)); + Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, + DAG.getConstant(SplitNumElts, DL, VecIdxTy)); + Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2); + } + else { + Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); + Op2 = DAG.getUNDEF(WideVT); + } + // First list the elements we want to keep. unsigned SizeMult = SrcSize / TrgVT.getSizeInBits(); SmallVector ShuffV; @@ -7176,8 +7193,8 @@ // ShuffV.push_back(i + WideNumElts); ShuffV.push_back(WideNumElts + 1); - SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc); - return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV); + SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1); + return DAG.getVectorShuffle(WideVT, DL, Conv, Op2, ShuffV); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when @@ -10188,7 +10205,9 @@ EVT OpVT = N->getOperand(0).getValueType(); if (TrgVT.isVector() && isOperationCustom(N->getOpcode(), TrgVT) && - OpVT.getSizeInBits() <= 128 && + OpVT.getSizeInBits() <= 256 && + isPowerOf2_32(TrgVT.getVectorElementType().getSizeInBits()) && + TrgVT.getSizeInBits() <= 128 && isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits())) Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); return; Index: llvm/test/CodeGen/PowerPC/vec-trunc2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/vec-trunc2.ll @@ -0,0 +1,102 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mattr=+vsx -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE + +define dso_local <8 x i8> @test8x32(<8 x i32> %v1) { +; CHECK-LABEL: .LCPI0_0: +; CHECK-NEXT: byte 31 +; CHECK-NEXT: byte 27 +; CHECK-NEXT: byte 23 +; CHECK-NEXT: byte 19 +; CHECK-NEXT: byte 15 +; CHECK-NEXT: byte 11 +; CHECK-NEXT: byte 7 +; CHECK-NEXT: byte 3 +; CHECK-NEXT: byte 14 +; CHECK-NEXT: byte 14 +; CHECK-NEXT: byte 14 +; CHECK-NEXT: byte 14 +; CHECK-NEXT: byte 14 +; CHECK-NEXT: byte 14 +; CHECK-NEXT: byte 14 +; CHECK-NEXT: byte 14 +; CHECK-LABEL: test8x32: +; CHECK: lvx v4, 0, r3 +; CHECK: vperm v2, v3, v2, v4 +; CHECK: blr +; +; CHECK-BE-LABEL: .LCPI0_0: +; CHECK-BE-NEXT: 3 +; CHECK-BE-NEXT: 7 +; CHECK-BE-NEXT: 11 +; CHECK-BE-NEXT: 15 +; CHECK-BE-NEXT: 19 +; CHECK-BE-NEXT: 23 +; CHECK-BE-NEXT: 27 +; CHECK-BE-NEXT: 31 +; CHECK-BE-NEXT: 17 +; CHECK-BE-NEXT: 17 +; CHECK-BE-NEXT: 17 +; CHECK-BE-NEXT: 17 +; CHECK-BE-NEXT: 17 +; CHECK-BE-NEXT: 17 +; CHECK-BE-NEXT: 17 +; CHECK-BE-NEXT: 17 +; CHECK-BE-LABEL: test8x32: +; CHECK-BE: lxvw4x v4, 0, r3 +; CHECK-BE: vperm v2, v2, v3, v4 +; CHECK-BE: blr +%v2 = trunc <8 x i32> %v1 to <8 x i8> +ret <8 x i8> %v2 +} + +define dso_local <4 x i16> @test4x64(<4 x i64> %v1) { +;CHECK-LABEL: .LCPI1_0: +;CHECK-NEXT: byte 31 +;CHECK-NEXT: byte 30 +;CHECK-NEXT: byte 23 +;CHECK-NEXT: byte 22 +;CHECK-NEXT: byte 15 +;CHECK-NEXT: byte 14 +;CHECK-NEXT: byte 7 +;CHECK-NEXT: byte 6 +;CHECK-NEXT: byte 13 +;CHECK-NEXT: byte 12 +;CHECK-NEXT: byte 13 +;CHECK-NEXT: byte 12 +;CHECK-NEXT: byte 13 +;CHECK-NEXT: byte 12 +;CHECK-NEXT: byte 13 +;CHECK-NEXT: byte 12 +; CHECK-LABEL: test4x64: +; CHECK: lvx v4, 0, r3 +; CHECK: vperm v2, v3, v2, v4 +; CHECK: blr +; +; CHECK-BE-LABEL: .LCPI1_0: +; CHECK-BE-NEXT: 6 +; CHECK-BE-NEXT: 7 +; CHECK-BE-NEXT: 14 +; CHECK-BE-NEXT: 15 +; CHECK-BE-NEXT: 22 +; CHECK-BE-NEXT: 23 +; CHECK-BE-NEXT: 30 +; CHECK-BE-NEXT: 31 +; CHECK-BE-NEXT: 18 +; CHECK-BE-NEXT: 19 +; CHECK-BE-NEXT: 18 +; CHECK-BE-NEXT: 19 +; CHECK-BE-NEXT: 18 +; CHECK-BE-NEXT: 19 +; CHECK-BE-NEXT: 18 +; CHECK-BE-NEXT: 19 +; CHECK-BE-LABEL: test4x64: +; CHECK-BE: lxvw4x v4, 0, r3 +; CHECK-BE: vperm v2, v2, v3, v4 +; CHECK-BE: blr +%v2 = trunc <4 x i64> %v1 to <4 x i16> +ret <4 x i16> %v2 +}