diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -856,6 +856,13 @@ return; } break; + case ISD::USHLSAT: + case ISD::SSHLSAT: + if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: // Expand the fpsosisat if it is scalable to prevent it from unrolling below. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9189,9 +9189,13 @@ assert(VT == RHS.getValueType() && "Expected operands to be the same type"); assert(VT.isInteger() && "Expected operands to be integers"); + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate. unsigned BW = VT.getScalarSizeInBits(); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS); SDValue Orig = DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS); @@ -9200,14 +9204,14 @@ if (IsSigned) { SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT); SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT); - SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT), - SatMin, SatMax, ISD::SETLT); + SDValue Cond = + DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT); + SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax); } else { SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT); } - Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE); - - return Result; + SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE); + return DAG.getSelect(dl, VT, Cond, SatVal, Result); } SDValue diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll --- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll @@ -7,72 +7,42 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-LABEL: vec: ; X64: # %bb.0: -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X64-NEXT: movd %xmm2, %eax -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X64-NEXT: movd %xmm2, %ecx -; X64-NEXT: movl %eax, %edx -; X64-NEXT: shll %cl, %edx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: sarl %cl, %esi -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sets %cl -; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF -; X64-NEXT: cmpl %esi, %eax -; X64-NEXT: cmovel %edx, %ecx -; X64-NEXT: movd %ecx, %xmm2 -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X64-NEXT: movd %xmm3, %eax -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X64-NEXT: movd %xmm3, %ecx -; X64-NEXT: movl %eax, %edx -; X64-NEXT: shll %cl, %edx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: sarl %cl, %esi -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sets %cl -; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF -; X64-NEXT: cmpl %esi, %eax -; X64-NEXT: cmovel %edx, %ecx -; X64-NEXT: movd %ecx, %xmm3 -; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: movd %xmm1, %ecx -; X64-NEXT: movl %eax, %edx -; X64-NEXT: shll %cl, %edx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: sarl %cl, %esi -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sets %cl -; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF -; X64-NEXT: cmpl %esi, %eax -; X64-NEXT: cmovel %edx, %ecx -; X64-NEXT: movd %ecx, %xmm2 -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X64-NEXT: movd %xmm0, %eax -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; X64-NEXT: movd %xmm0, %ecx -; X64-NEXT: movl %eax, %edx -; X64-NEXT: shll %cl, %edx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: sarl %cl, %esi -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: testl %eax, %eax -; X64-NEXT: sets %cl -; X64-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF -; X64-NEXT: cmpl %esi, %eax -; X64-NEXT: cmovel %edx, %ecx -; X64-NEXT: movd %ecx, %xmm0 -; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7] +; X64-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7] +; X64-NEXT: pslld $23, %xmm1 +; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-NEXT: cvttps2dq %xmm1, %xmm5 +; X64-NEXT: movdqa %xmm0, %xmm1 +; X64-NEXT: pmuludq %xmm5, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,2,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] +; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] +; X64-NEXT: pmuludq %xmm7, %xmm5 +; X64-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] +; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] +; X64-NEXT: pshuflw {{.*#+}} xmm5 = xmm2[2,3,3,3,4,5,6,7] +; X64-NEXT: movdqa %xmm6, %xmm7 +; X64-NEXT: psrad %xmm5, %xmm7 +; X64-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,1,1,4,5,6,7] +; X64-NEXT: movdqa %xmm1, %xmm5 +; X64-NEXT: psrad %xmm2, %xmm5 +; X64-NEXT: punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm7[1] +; X64-NEXT: movdqa %xmm6, %xmm2 +; X64-NEXT: psrad %xmm3, %xmm2 +; X64-NEXT: psrad %xmm4, %xmm1 +; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm5[0,3] +; X64-NEXT: pcmpeqd %xmm0, %xmm1 +; X64-NEXT: pand %xmm1, %xmm6 +; X64-NEXT: pxor %xmm2, %xmm2 +; X64-NEXT: pcmpgtd %xmm0, %xmm2 +; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: por %xmm2, %xmm0 +; X64-NEXT: pandn %xmm0, %xmm1 +; X64-NEXT: por %xmm6, %xmm1 +; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: vec: diff --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll --- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll @@ -7,57 +7,36 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-LABEL: vec: ; X64: # %bb.0: -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X64-NEXT: movd %xmm2, %eax -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X64-NEXT: movd %xmm2, %ecx -; X64-NEXT: movl %eax, %edx -; X64-NEXT: shll %cl, %edx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %esi -; X64-NEXT: cmpl %esi, %eax -; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovnel %eax, %edx -; X64-NEXT: movd %edx, %xmm2 -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X64-NEXT: movd %xmm3, %edx -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X64-NEXT: movd %xmm3, %ecx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: shll %cl, %esi -; X64-NEXT: movl %esi, %edi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edi -; X64-NEXT: cmpl %edi, %edx -; X64-NEXT: cmovnel %eax, %esi -; X64-NEXT: movd %esi, %xmm3 -; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; X64-NEXT: movd %xmm0, %edx -; X64-NEXT: movd %xmm1, %ecx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: shll %cl, %esi -; X64-NEXT: movl %esi, %edi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edi -; X64-NEXT: cmpl %edi, %edx -; X64-NEXT: cmovnel %eax, %esi -; X64-NEXT: movd %esi, %xmm2 -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X64-NEXT: movd %xmm0, %edx -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; X64-NEXT: movd %xmm0, %ecx -; X64-NEXT: movl %edx, %esi -; X64-NEXT: shll %cl, %esi -; X64-NEXT: movl %esi, %edi -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edi -; X64-NEXT: cmpl %edi, %edx -; X64-NEXT: cmovnel %eax, %esi -; X64-NEXT: movd %esi, %xmm0 -; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X64-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7] +; X64-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7] +; X64-NEXT: pslld $23, %xmm1 +; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-NEXT: cvttps2dq %xmm1, %xmm1 +; X64-NEXT: movdqa %xmm0, %xmm5 +; X64-NEXT: pmuludq %xmm1, %xmm5 +; X64-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,2,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-NEXT: pmuludq %xmm7, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1] +; X64-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[2,3,3,3,4,5,6,7] +; X64-NEXT: movdqa %xmm6, %xmm7 +; X64-NEXT: psrld %xmm1, %xmm7 +; X64-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,1,1,1,4,5,6,7] +; X64-NEXT: movdqa %xmm5, %xmm2 +; X64-NEXT: psrld %xmm1, %xmm2 +; X64-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm7[1] +; X64-NEXT: movdqa %xmm6, %xmm1 +; X64-NEXT: psrld %xmm3, %xmm1 +; X64-NEXT: psrld %xmm4, %xmm5 +; X64-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm1[0] +; X64-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm2[0,3] +; X64-NEXT: pcmpeqd %xmm5, %xmm0 +; X64-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-NEXT: pxor %xmm1, %xmm0 +; X64-NEXT: por %xmm6, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: vec: