diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8173,6 +8173,43 @@ } } +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8248,6 +8285,9 @@ SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); @@ -8623,6 +8663,9 @@ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); } + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + // The general case for select-of-constants: // vselect Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so diff --git a/llvm/test/CodeGen/X86/select-sra.ll b/llvm/test/CodeGen/X86/select-sra.ll --- a/llvm/test/CodeGen/X86/select-sra.ll +++ b/llvm/test/CodeGen/X86/select-sra.ll @@ -4,10 +4,9 @@ define i8 @isnonneg_i8(i8 %x) { ; CHECK-LABEL: isnonneg_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $42, %ecx -; CHECK-NEXT: movl $255, %eax -; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $7, %al +; CHECK-NEXT: orb $42, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cond = icmp sgt i8 %x, -1 @@ -18,10 +17,9 @@ define i16 @isnonneg_i16(i16 %x) { ; CHECK-LABEL: isnonneg_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: testw %di, %di -; CHECK-NEXT: movl $542, %ecx # imm = 0x21E -; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF -; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: movswl %di, %eax +; CHECK-NEXT: sarl $15, %eax +; CHECK-NEXT: orl $542, %eax # imm = 0x21E ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %cond = icmp sgt i16 %x, -1 @@ -32,10 +30,9 @@ define i32 @isnonneg_i32(i32 %x) { ; CHECK-LABEL: isnonneg_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: movl $-42, %ecx -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: orl $-42, %eax ; CHECK-NEXT: retq %cond = icmp sgt i32 %x, -1 %r = select i1 %cond, i32 -42, i32 -1 @@ -45,10 +42,9 @@ define i64 @isnonneg_i64(i64 %x) { ; CHECK-LABEL: isnonneg_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movl $2342342, %ecx # imm = 0x23BDC6 -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: cmovnsq %rcx, %rax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $63, %rax +; CHECK-NEXT: orq $2342342, %rax # imm = 0x23BDC6 ; CHECK-NEXT: retq %cond = icmp sgt i64 %x, -1 %r = select i1 %cond, i64 2342342, i64 -1 @@ -58,10 +54,10 @@ define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) { ; CHECK-LABEL: isnonneg_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtb %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pcmpgtb %xmm0, %xmm1 +; CHECK-NEXT: por {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <16 x i8> %x, %r = select <16 x i1> %cond, <16 x i8> , <16 x i8> @@ -71,9 +67,7 @@ define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) { ; CHECK-LABEL: isnonneg_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtw %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: psraw $15, %xmm0 ; CHECK-NEXT: por {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <8 x i16> %x, @@ -84,9 +78,7 @@ define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) { ; CHECK-LABEL: isnonneg_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 ; CHECK-NEXT: por {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <4 x i32> %x, @@ -97,18 +89,8 @@ define <2 x i64> @isnonneg_v2i64(<2 x i64> %x) { ; CHECK-LABEL: isnonneg_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067967,18446744071562067967] -; CHECK-NEXT: movdqa %xmm0, %xmm2 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-NEXT: pand %xmm3, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: por {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <2 x i64> %x, @@ -182,10 +164,8 @@ define <8 x i16> @isneg_v8i16(<8 x i16> %x) { ; CHECK-LABEL: isneg_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtw %xmm0, %xmm1 -; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psraw $15, %xmm0 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp slt <8 x i16> %x, zeroinitializer %r = select <8 x i1> %cond, <8 x i16> , <8 x i16> zeroinitializer @@ -195,10 +175,8 @@ define <4 x i32> @isneg_v4i32(<4 x i32> %x) { ; CHECK-LABEL: isneg_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 -; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp slt <4 x i32> %x, zeroinitializer %r = select <4 x i1> %cond, <4 x i32> , <4 x i32> zeroinitializer @@ -208,15 +186,8 @@ define <2 x i64> @isneg_v2i64(<2 x i64> %x) { ; CHECK-LABEL: isneg_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: movdqa %xmm1, %xmm2 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm2 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-NEXT: pand %xmm2, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp slt <2 x i64> %x, zeroinitializer