Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -34109,6 +34109,41 @@ return SDValue(); } +/// Detect patterns of truncation with signed saturation: +/// (truncate (smin ((smax (x, signed_min_of_dest_type)), +/// signed_max_of_dest_type)) to dest_type) +/// or: +/// (truncate (smax ((smin (x, signed_max_of_dest_type)), +/// signed_min_of_dest_type)) to dest_type). +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched. +static SDValue detectSSatPattern(SDValue In, EVT VT) { + unsigned NumDstBits = VT.getScalarSizeInBits(); + unsigned NumSrcBits = In.getScalarValueSizeInBits(); + assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); + + auto MatchMinMax = [](SDValue V, unsigned Opcode, const APInt &Limit) { + APInt C; + if (V.getOpcode() == Opcode && + ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit) + return V.getOperand(0); + return SDValue(); + }; + + APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); + APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); + + if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) + if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) + return SMax; + + if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) + if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) + return SMin; + + return SDValue(); +} + /// Detect a pattern of truncation with saturation: /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// The types should allow to use VPMOVUS* instruction on AVX512. @@ -34121,15 +34156,18 @@ return detectUSatPattern(In, VT); } -static SDValue combineTruncateWithUSat(SDValue In, EVT VT, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT)) return SDValue(); - if (auto USatVal = detectUSatPattern(In, VT)) - if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) + if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) { + if (auto SSatVal = detectSSatPattern(In, VT)) + return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); + if (auto USatVal = detectUSatPattern(In, VT)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + } return SDValue(); } @@ -35393,8 +35431,8 @@ if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; - // Try to combine truncation with unsigned saturation. - if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) + // Try to combine truncation with signed/unsigned saturation. + if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) return Val; // The bitcast source is a direct mmx result. Index: llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll +++ llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll @@ -258,9 +258,7 @@ ; ; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsqd %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -278,9 +276,7 @@ ; ; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovsqd %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <4 x i64> %a0, @@ -705,9 +701,7 @@ ; ; AVX512-LABEL: trunc_ssat_v8i64_v8i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> @@ -1172,9 +1166,7 @@ ; ; AVX512-LABEL: trunc_ssat_v8i64_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, @@ -1302,9 +1294,7 @@ ; ; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsdw %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -1321,9 +1311,7 @@ ; ; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovsdw %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <8 x i32> %a0, @@ -1511,9 +1499,7 @@ ; ; AVX512-LABEL: trunc_ssat_v16i32_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512-NEXT: vpmovsdw %zmm0, %ymm0 ; AVX512-NEXT: retq %1 = icmp slt <16 x i32> %a0, %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> @@ -3231,9 +3217,7 @@ ; ; AVX512-LABEL: trunc_ssat_v16i32_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovsdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <16 x i32> %a0, @@ -3344,9 +3328,7 @@ ; ; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovswb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <16 x i16> %a0, @@ -3500,16 +3482,12 @@ ; ; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: vpmovswb %zmm0, %ymm0 ; AVX512BWVL-NEXT: retq %1 = icmp slt <32 x i16> %a0, %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16>