Index: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h @@ -85,7 +85,10 @@ /// If N is a BUILD_VECTOR node whose elements are all the same constant or /// undefined, return true and return the constant value in \p SplatValue. - bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); + /// This sets \p SplatValue to the smallest possible splat unless AllowShrink + /// is set to false. + bool isConstantSplatVector(const SDNode *N, APInt &SplatValue, + bool AllowShrink = true); /// Return true if the specified node is a BUILD_VECTOR where all of the /// elements are ~0 or undef. Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -116,7 +116,8 @@ // ISD Namespace //===----------------------------------------------------------------------===// -bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal, + bool AllowShrink) { auto *BV = dyn_cast(N); if (!BV) return false; @@ -124,9 +125,11 @@ APInt SplatUndef; unsigned SplatBitSize; bool HasUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) && - EltVT.getSizeInBits() >= SplatBitSize; + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + unsigned MinSplatBits = AllowShrink ? 0 : EltSize; + return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, + MinSplatBits) && + EltSize >= SplatBitSize; } // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -29567,8 +29567,9 @@ // In SetLT case, The second operand of the comparison can be either 1 or 0. APInt SplatVal; if ((CC == ISD::SETLT) && - !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) && - SplatVal == 1) || + !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal, + /*AllowShrink*/false) && + SplatVal.isOneValue()) || (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode())))) return false; @@ -32083,7 +32084,8 @@ return SDValue(); APInt SplatVal; - if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || + if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal, + /*AllowShrink*/false) || !SplatVal.isMask()) return SDValue(); @@ -32667,7 +32669,8 @@ "Unexpected types for truncate operation"); APInt C; - if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { + if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C, + /*AllowShrink*/false)) { // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according // the element size of the destination type. return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) : @@ -35374,7 +35377,8 @@ SDNode *N1 = N->getOperand(1).getNode(); APInt SplatVal; - if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue()) + if (!ISD::isConstantSplatVector(N1, SplatVal, /*AllowShrink*/false) || + !SplatVal.isOneValue()) return SDValue(); SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); Index: llvm/trunk/test/CodeGen/X86/pr34271-1.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr34271-1.ll +++ llvm/trunk/test/CodeGen/X86/pr34271-1.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512vl,avx512bw | FileCheck %s + +define <16 x i16> @foo(<16 x i32> %i) { +; CHECK-LABEL: foo: +; CHECK: # BB#0: +; CHECK-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: retq + %x3 = icmp ult <16 x i32> %i, + %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> + %x6 = trunc <16 x i32> %x5 to <16 x i16> + ret <16 x i16> %x6 +} Index: llvm/trunk/test/CodeGen/X86/pr34271.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr34271.ll +++ llvm/trunk/test/CodeGen/X86/pr34271.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +; CHECK: .LCPI0_0: +; CHECK-NEXT: .zero 16,1 + +define <4 x i32> @f(<4 x i32> %a) { +; CHECK-LABEL: f: +; CHECK: # BB#0: +; CHECK-NEXT: paddd .LCPI0_0(%rip), %xmm0 +; CHECK-NEXT: retq + %v = add nuw nsw <4 x i32> %a, + ret <4 x i32> %v +}