Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7152,8 +7152,7 @@ SDLoc DL(Op); SDValue N1 = Op.getOperand(0); unsigned SrcSize = N1.getValueType().getSizeInBits(); - assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector"); - SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); + assert(SrcSize <= 256 && "Source must fit in two Altivec/VSX vectors"); EVT TrgVT = Op.getValueType(); unsigned TrgNumElts = TrgVT.getVectorNumElements(); @@ -7161,6 +7160,24 @@ unsigned WideNumElts = 128 / EltVT.getSizeInBits(); EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); + SDValue Op1, Op2; + if (SrcSize == 256) { + EVT VecIdxTy = + DAG.getTargetLoweringInfo().getVectorIdxTy(DAG.getDataLayout()); + EVT SrcEltVT = N1.getValueType().getVectorElementType(); + unsigned SplitNumElts = 128 / SrcEltVT.getSizeInBits(); + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, SplitNumElts); + Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, + DAG.getConstant(0, DL, VecIdxTy)); + Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, + DAG.getConstant(SplitNumElts, DL, VecIdxTy)); + Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2); + } + else { + Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); + Op2 = DAG.getUNDEF(WideVT); + } + // First list the elements we want to keep. unsigned SizeMult = SrcSize / TrgVT.getSizeInBits(); SmallVector ShuffV; @@ -7176,8 +7193,8 @@ // ShuffV.push_back(i + WideNumElts); ShuffV.push_back(WideNumElts + 1); - SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc); - return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV); + SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1); + return DAG.getVectorShuffle(WideVT, DL, Conv, Op2, ShuffV); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when @@ -10188,7 +10205,9 @@ EVT OpVT = N->getOperand(0).getValueType(); if (TrgVT.isVector() && isOperationCustom(N->getOpcode(), TrgVT) && - OpVT.getSizeInBits() <= 128 && + OpVT.getSizeInBits() <= 256 && + isPowerOf2_32(TrgVT.getVectorElementType().getSizeInBits()) && + TrgVT.getSizeInBits() <= 128 && isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits())) Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); return; Index: llvm/test/CodeGen/PowerPC/vec-trunc.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec-trunc.ll +++ llvm/test/CodeGen/PowerPC/vec-trunc.ll @@ -195,3 +195,43 @@ store <2 x i16> %1, <2 x i16>* %Sink, align 16 ret void } + +define dso_local <8 x i8> @test8x32(<8 x i32> %v1) { +; CHECK-LABEL: test8x32: +; CHECK: # %bb.0: +; CHECK-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-NEXT: lvx v4, 0, r3 +; CHECK-NEXT: vperm v2, v3, v2, v4 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test8x32: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-NEXT: blr +%v2 = trunc <8 x i32> %v1 to <8 x i8> +ret <8 x i8> %v2 +} + +define dso_local <4 x i16> @test4x64(<4 x i64> %v1) { +; CHECK-LABEL: test4x64: +; CHECK: # %bb.0: +; CHECK-NEXT: addis r3, r2, .LCPI8_0@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI8_0@toc@l +; CHECK-NEXT: lvx v4, 0, r3 +; CHECK-NEXT: vperm v2, v3, v2, v4 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test4x64: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: addis r3, r2, .LCPI8_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI8_0@toc@l +; CHECK-BE-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-NEXT: blr +%v2 = trunc <4 x i64> %v1 to <4 x i16> +ret <4 x i16> %v2 +}