diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49867,6 +49867,41 @@ PMADDBuilder); } +/// CMOV of constants requires materializing constant operands in registers. +/// Try to fold those constants into an 'add' instruction to reduce instruction +/// count. We do this with CMOV rather the generic 'select' because there are +/// earlier folds that may be used to turn select-of-constants into logic hacks. +static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { + // This checks for a zero operand because add-of-0 gets simplified away. + // TODO: Allow generating an extra add? + auto isSuitableCmov = [](SDValue V) { + if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse()) + return false; + return isa(V.getOperand(0)) && + isa(V.getOperand(1)) && + (isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1))); + }; + + // Match an appropriate CMOV as the first operand of the add. + SDValue Cmov = N->getOperand(0); + SDValue OtherOp = N->getOperand(1); + if (!isSuitableCmov(Cmov)) + std::swap(Cmov, OtherOp); + if (!isSuitableCmov(Cmov)) + return SDValue(); + + // add (cmov C, 0), OtherOp --> cmov (add OtherOp, C), OtherOp + // add (cmov 0, C), OtherOp --> cmov OtherOp, (add OtherOp, C) + SDLoc DL(N); + SDValue FalseOp = Cmov.getOperand(0); + SDValue TrueOp = Cmov.getOperand(1); + EVT VT = N->getValueType(0); + FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp); + TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp); + return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2), + Cmov.getOperand(3)); +} + static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -49874,6 +49909,9 @@ SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); + if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG)) + return Select; + if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) return MAdd; if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) diff --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll --- a/llvm/test/CodeGen/X86/add-cmov.ll +++ b/llvm/test/CodeGen/X86/add-cmov.ll @@ -4,11 +4,9 @@ define i64 @select_consts_i64(i64 %offset, i32 %x) { ; CHECK-LABEL: select_consts_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: leaq 42(%rdi), %rax ; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: cmovneq %rcx, %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: cmovneq %rdi, %rax ; CHECK-NEXT: retq %b = icmp eq i32 %x, 0 %s = select i1 %b, i64 42, i64 0 @@ -19,11 +17,10 @@ define i32 @select_consts_i32(i32 %offset, i64 %x) { ; CHECK-LABEL: select_consts_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 43(%rdi), %eax ; CHECK-NEXT: cmpq $42, %rsi -; CHECK-NEXT: movl $43, %eax -; CHECK-NEXT: cmovgel %ecx, %eax -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: cmovgel %edi, %eax ; CHECK-NEXT: retq %b = icmp sgt i64 %x, 41 %s = select i1 %b, i32 0, i32 43 @@ -34,11 +31,10 @@ define i16 @select_consts_i16(i16 %offset, i1 %b) { ; CHECK-LABEL: select_consts_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 44(%rdi), %eax ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: movl $44, %eax -; CHECK-NEXT: cmovel %ecx, %eax -; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: cmovel %edi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %s = select i1 %b, i16 44, i16 0