Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -31382,21 +31382,6 @@ return false; } -/// Return true if VPACK* instruction can be used for the given types -/// and it is avalable on \p Subtarget. -static bool -isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { - if (Subtarget.hasSSE2()) - // v16i16 -> v16i8 - if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8) - return true; - if (Subtarget.hasSSE41()) - // v8i32 -> v8i16 - if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16) - return true; - return false; -} - /// Detect a pattern of truncation with saturation: /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// Return the source value to be truncated or SDValue() if the pattern was not @@ -31437,16 +31422,9 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT)) return SDValue(); - SDValue USatVal = detectUSatPattern(In, VT); - if (USatVal) { + if (auto USatVal = detectUSatPattern(In, VT)) if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL); - return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi); - } - } return SDValue(); } Index: llvm/trunk/test/CodeGen/X86/avx-trunc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-trunc.ll +++ llvm/trunk/test/CodeGen/X86/avx-trunc.ll @@ -40,28 +40,4 @@ ret <16 x i8> %B } -define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { -; CHECK-LABEL: usat_trunc_wb_256: -; CHECK: # BB#0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - %x3 = icmp ult <16 x i16> %i, - %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> - %x6 = trunc <16 x i16> %x5 to <16 x i8> - ret <16 x i8> %x6 -} -define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) { -; CHECK-LABEL: usat_trunc_dw_256: -; CHECK: # BB#0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - %x3 = icmp ult <8 x i32> %i, - %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> - %x6 = trunc <8 x i32> %x5 to <8 x i16> - ret <8 x i16> %x6 -} Index: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll +++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll @@ -505,8 +505,9 @@ define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) { ; KNL-LABEL: usat_trunc_wb_256_mem: ; KNL: ## BB#0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vmovdqu %xmm0, (%rdi) ; KNL-NEXT: retq ; @@ -524,8 +525,9 @@ define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { ; KNL-LABEL: usat_trunc_wb_256: ; KNL: ## BB#0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: usat_trunc_wb_256: Index: llvm/trunk/test/CodeGen/X86/pr31773.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr31773.ll +++ llvm/trunk/test/CodeGen/X86/pr31773.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s + +; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number. +define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { +; CHECK-LABEL: usat_trunc_wb_256: +; CHECK-NOT: vpackuswb %xmm1, %xmm0, %xmm0 + %x3 = icmp ult <16 x i16> %i, + %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> + %x6 = trunc <16 x i16> %x5 to <16 x i8> + ret <16 x i8> %x6 +} + +define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) { +; CHECK-LABEL: usat_trunc_dw_256: +; CHECK-NOT: vpackusdw %xmm1, %xmm0, %xmm0 + %x3 = icmp ult <8 x i32> %i, + %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> + %x6 = trunc <8 x i32> %x5 to <8 x i16> + ret <8 x i16> %x6 +}