Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9084,6 +9084,26 @@ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); } + // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller + // than X. Just move the AssertZext in front of the truncate and drop the + // AssertSExt. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::AssertSext && + Opcode == ISD::AssertZext) { + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast(BigA.getOperand(1))->getVT(); + assert(BigA_AssertVT.bitsLE(N0.getValueType()) && + "Asserting zero/sign-extended bits to a type larger than the " + "truncated destination does not provide information"); + + if (AssertVT.bitsLT(BigA_AssertVT)) { + SDLoc DL(N); + SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), + BigA.getOperand(0), N1); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); + } + } + return SDValue(); } Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -35398,6 +35398,26 @@ return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); } + // Try to combine a PACKUSWB implemented truncate with a regular truncate to + // create a larger truncate. + // TODO: Match PACKSSWB as well? + if (Subtarget.hasAVX512() && Opcode == X86ISD::PACKUS && + N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && + N0.getOperand(0).getValueType() == MVT::v8i32) { + + APInt ZeroMask = APInt::getHighBitsSet(16, 8); + if (DAG.MaskedValueIsZero(N0, ZeroMask)) { + if (Subtarget.hasVLX()) + return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); + + // Widen input to v16i32 so we can truncate that. + SDLoc dl(N); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32, + N0.getOperand(0), DAG.getUNDEF(MVT::v8i32)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat); + } + } + // Attempt to combine as shuffle. SDValue Op(N, 0); if (SDValue Res = Index: llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll @@ -505,16 +505,14 @@ ; NOVL-LABEL: f64to8uc: ; NOVL: # %bb.0: ; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; NOVL-NEXT: vpmovdw %zmm0, %ymm0 -; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; NOVL-NEXT: vpmovdb %zmm0, %xmm0 ; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: f64to8uc: ; VL: # %bb.0: ; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; VL-NEXT: vpmovdw %ymm0, %xmm0 -; VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; VL-NEXT: vpmovdb %ymm0, %xmm0 ; VL-NEXT: vzeroupper ; VL-NEXT: retq %res = fptoui <8 x double> %f to <8 x i8>