Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27189,6 +27189,19 @@ Known.Zero.setBitsFrom(InBitWidth); break; } + case X86ISD::CMOV: { + DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1); + // If we don't know any bits, early out. + if (!Known.One && !Known.Zero) + break; + KnownBits Known2; + DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } } } @@ -27251,6 +27264,13 @@ case X86ISD::VPCOMU: // Vector compares return zero/all-bits result values. return VTBits; + + case X86ISD::CMOV: { + unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp0 == 1) return 1; // Early out. + unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); + return std::min(Tmp0, Tmp1); + } } // Fallback case. @@ -31192,15 +31212,38 @@ const X86Subtarget &Subtarget) { SDLoc DL(N); - // If the flag operand isn't dead, don't touch this CMOV. - if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) - return SDValue(); - SDValue FalseOp = N->getOperand(0); SDValue TrueOp = N->getOperand(1); X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); SDValue Cond = N->getOperand(3); + // Try to promote 16-bit CMOVs. + if (N->getValueType(0) == MVT::i16 && !MayFoldLoad(N->getOperand(0)) && + !MayFoldLoad(N->getOperand(1))) { + if (isa(FalseOp)) + FalseOp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, FalseOp); + else + FalseOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, FalseOp); + DCI.AddToWorklist(FalseOp.getNode()); + if (isa(TrueOp)) + TrueOp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, TrueOp); + else + TrueOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, TrueOp); + DCI.AddToWorklist(FalseOp.getNode()); + SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, + DAG.getVTList(MVT::i32, MVT::Glue), FalseOp, + TrueOp, N->getOperand(2), Cond); + DCI.AddToWorklist(Cmov.getNode()); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Cmov); + if (N->getNumValues() == 2) + return DCI.CombineTo(N, Trunc, Cmov.getValue(1)); + return Trunc; + } + + // If the flag operand isn't dead, don't touch this CMOV. + if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) + return SDValue(); + if (CC == X86::COND_E || CC == X86::COND_NE) { switch (Cond.getOpcode()) { default: break; @@ -32465,6 +32508,24 @@ return SDValue(); // This routine will use CombineTo to replace N. } + // Try to removed unneeded cmov input bits if possible. This is to work + // around not having SimplifyDemandedBits for target nodes. + if (N->getOperand(0).getOpcode() == X86ISD::CMOV && + isa(N->getOperand(1))) { + SDValue N0 = N->getOperand(0); + APInt DemandedMask = + cast(N->getOperand(1))->getAPIntValue(); + if (SDValue DemandedLHS = DAG.GetDemandedBits(N0.getOperand(0), + DemandedMask)) + return DAG.getNode(X86ISD::CMOV, SDLoc(N0), N0->getVTList(), DemandedLHS, + N0.getOperand(1), N0.getOperand(2), N0.getOperand(3)); + if (SDValue DemandedRHS = DAG.GetDemandedBits(N0.getOperand(1), + DemandedMask)) + return DAG.getNode(X86ISD::CMOV, SDLoc(N0), N0->getVTList(), + N0.getOperand(0), DemandedRHS, N0.getOperand(2), + N0.getOperand(3)); + } + return SDValue(); } @@ -34775,47 +34836,6 @@ return R.getValue(1); } -// If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant -// operands and the result of CMOV is not used anywhere else - promote CMOV -// itself instead of promoting its result. This could be beneficial, because: -// 1) X86TargetLowering::EmitLoweredSelect later can do merging of two -// (or more) pseudo-CMOVs only when they go one-after-another and -// getting rid of result extension code after CMOV will help that. -// 2) Promotion of constant CMOV arguments is free, hence the -// {ANY,SIGN,ZERO}_EXTEND will just be deleted. -// 3) 16-bit CMOV encoding is 4 bytes, 32-bit CMOV is 3-byte, so this -// promotion is also good in terms of code-size. -// (64-bit CMOV is 4-bytes, that's why we don't do 32-bit => 64-bit -// promotion). -static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) { - SDValue CMovN = Extend->getOperand(0); - if (CMovN.getOpcode() != X86ISD::CMOV) - return SDValue(); - - EVT TargetVT = Extend->getValueType(0); - unsigned ExtendOpcode = Extend->getOpcode(); - SDLoc DL(Extend); - - EVT VT = CMovN.getValueType(); - SDValue CMovOp0 = CMovN.getOperand(0); - SDValue CMovOp1 = CMovN.getOperand(1); - - bool DoPromoteCMOV = - (VT == MVT::i16 && (TargetVT == MVT::i32 || TargetVT == MVT::i64)) && - CMovN.hasOneUse() && - (isa(CMovOp0.getNode()) && - isa(CMovOp1.getNode())); - - if (!DoPromoteCMOV) - return SDValue(); - - CMovOp0 = DAG.getNode(ExtendOpcode, DL, TargetVT, CMovOp0); - CMovOp1 = DAG.getNode(ExtendOpcode, DL, TargetVT, CMovOp1); - - return DAG.getNode(X86ISD::CMOV, DL, TargetVT, CMovOp0, CMovOp1, - CMovN.getOperand(2), CMovN.getOperand(3)); -} - // Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)). // This is more or less the reverse of combineBitcastvxi1. static SDValue @@ -35019,9 +35039,6 @@ if (SDValue DivRem8 = getDivRem8(N, DAG)) return DivRem8; - if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) - return NewCMov; - if (!DCI.isBeforeLegalizeOps()) { if (InVT == MVT::i1) { SDValue Zero = DAG.getConstant(0, DL, VT); @@ -35177,9 +35194,6 @@ } } - if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) - return NewCMov; - if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) return V; Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -296,8 +296,9 @@ ; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: movw $-1, %ax -; CHECK-NEXT: cmovew %cx, %ax +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq %x = load i1 , i1 * %addr, align 1 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -880,9 +880,9 @@ ; KNL-LABEL: test15: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: movw $21845, %ax ## imm = 0x5555 -; KNL-NEXT: movw $1, %cx -; KNL-NEXT: cmovgw %ax, %cx +; KNL-NEXT: movl $21845, %eax ## imm = 0x5555 +; KNL-NEXT: movl $1, %ecx +; KNL-NEXT: cmovgl %eax, %ecx ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 @@ -892,9 +892,9 @@ ; SKX-LABEL: test15: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: movw $21845, %ax ## imm = 0x5555 -; SKX-NEXT: movw $1, %cx -; SKX-NEXT: cmovgw %ax, %cx +; SKX-NEXT: movl $21845, %eax ## imm = 0x5555 +; SKX-NEXT: movl $1, %ecx +; SKX-NEXT: cmovgl %eax, %ecx ; SKX-NEXT: kmovd %ecx, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq @@ -902,9 +902,9 @@ ; AVX512BW-LABEL: test15: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: movw $21845, %ax ## imm = 0x5555 -; AVX512BW-NEXT: movw $1, %cx -; AVX512BW-NEXT: cmovgw %ax, %cx +; AVX512BW-NEXT: movl $21845, %eax ## imm = 0x5555 +; AVX512BW-NEXT: movl $1, %ecx +; AVX512BW-NEXT: cmovgl %eax, %ecx ; AVX512BW-NEXT: kmovd %ecx, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 @@ -914,9 +914,9 @@ ; AVX512DQ-LABEL: test15: ; AVX512DQ: ## BB#0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: movw $21845, %ax ## imm = 0x5555 -; AVX512DQ-NEXT: movw $1, %cx -; AVX512DQ-NEXT: cmovgw %ax, %cx +; AVX512DQ-NEXT: movl $21845, %eax ## imm = 0x5555 +; AVX512DQ-NEXT: movl $1, %ecx +; AVX512DQ-NEXT: cmovgl %eax, %ecx ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -5864,10 +5864,10 @@ ; CHECK-LABEL: vmov_test15: ; CHECK: # BB#0: ; CHECK-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; CHECK-NEXT: movw $21845, %ax # imm = 0x5555 +; CHECK-NEXT: movl $21845, %eax # imm = 0x5555 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: movw $1, %cx # sched: [1:0.25] -; CHECK-NEXT: cmovgw %ax, %cx # sched: [1:1.00] +; CHECK-NEXT: movl $1, %ecx # sched: [1:0.25] +; CHECK-NEXT: cmovgl %eax, %ecx # sched: [1:1.00] ; CHECK-NEXT: kmovd %ecx, %k0 ; CHECK-NEXT: vpmovm2b %k0, %xmm0 ; CHECK-NEXT: retq # sched: [2:1.00] Index: test/CodeGen/X86/bool-simplify.ll =================================================================== --- test/CodeGen/X86/bool-simplify.ll +++ test/CodeGen/X86/bool-simplify.ll @@ -53,7 +53,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: rdrandw %cx -; CHECK-NEXT: cmovbw %di, %ax +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq @@ -105,7 +105,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: rdseedw %cx -; CHECK-NEXT: cmovbw %di, %ax +; CHECK-NEXT: cmovbl %edi, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq Index: test/CodeGen/X86/cmov-promotion.ll =================================================================== --- test/CodeGen/X86/cmov-promotion.ll +++ test/CodeGen/X86/cmov-promotion.ll @@ -115,7 +115,7 @@ ; CMOV-NEXT: testb $1, %dil ; CMOV-NEXT: movl $12414, %ecx # imm = 0x307E ; CMOV-NEXT: movl $65535, %eax # imm = 0xFFFF -; CMOV-NEXT: cmovneq %rcx, %rax +; CMOV-NEXT: cmovnel %ecx, %eax ; CMOV-NEXT: retq ; ; NO_CMOV-LABEL: cmov_zpromotion_16_to_64: @@ -269,9 +269,10 @@ ; CMOV-LABEL: cmov_spromotion_16_to_64: ; CMOV: # BB#0: ; CMOV-NEXT: testb $1, %dil -; CMOV-NEXT: movl $12414, %ecx # imm = 0x307E -; CMOV-NEXT: movq $-1, %rax -; CMOV-NEXT: cmovneq %rcx, %rax +; CMOV-NEXT: movl $12414, %eax # imm = 0x307E +; CMOV-NEXT: movl $-1, %ecx +; CMOV-NEXT: cmovnel %eax, %ecx +; CMOV-NEXT: movslq %ecx, %rax ; CMOV-NEXT: retq ; ; NO_CMOV-LABEL: cmov_spromotion_16_to_64: Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -53,8 +53,7 @@ ; NOCMOV-NEXT: leal 12(%esp), %ecx ; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: movl (%ecx), %eax -; NOCMOV-NEXT: orl $4, %ecx -; NOCMOV-NEXT: movl (%ecx), %edx +; NOCMOV-NEXT: movl 4(%ecx), %edx ; NOCMOV-NEXT: retl define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 { entry: @@ -82,8 +81,7 @@ ; NOCMOV-NEXT: leal 20(%esp), %ecx ; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: movl (%ecx), %eax -; NOCMOV-NEXT: orl $4, %ecx -; NOCMOV-NEXT: movl (%ecx), %edx +; NOCMOV-NEXT: movl 4(%ecx), %edx ; NOCMOV-NEXT: retl define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 { entry: Index: test/CodeGen/X86/pr32282.ll =================================================================== --- test/CodeGen/X86/pr32282.ll +++ test/CodeGen/X86/pr32282.ll @@ -28,7 +28,6 @@ ; X86-NEXT: cmovnel %ecx, %edx ; X86-NEXT: cmovnel %eax, %ecx ; X86-NEXT: andl $-2, %edx -; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF ; X86-NEXT: addl $7, %edx ; X86-NEXT: adcxl %eax, %ecx ; X86-NEXT: pushl %ecx Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -968,8 +968,8 @@ ; GENERIC-NEXT: movl $32767, %eax ## imm = 0x7FFF ; GENERIC-NEXT: cmovlel %edi, %eax ; GENERIC-NEXT: cmpl $-32768, %eax ## imm = 0x8000 -; GENERIC-NEXT: movw $-32768, %cx ## imm = 0x8000 -; GENERIC-NEXT: cmovgew %ax, %cx +; GENERIC-NEXT: movl $-32768, %ecx ## imm = 0x8000 +; GENERIC-NEXT: cmovgel %eax, %ecx ; GENERIC-NEXT: movw %cx, (%rsi) ; GENERIC-NEXT: retq ; @@ -978,9 +978,9 @@ ; ATOM-NEXT: cmpl $32767, %edi ## imm = 0x7FFF ; ATOM-NEXT: movl $32767, %eax ## imm = 0x7FFF ; ATOM-NEXT: cmovlel %edi, %eax -; ATOM-NEXT: movw $-32768, %cx ## imm = 0x8000 +; ATOM-NEXT: movl $-32768, %ecx ## imm = 0x8000 ; ATOM-NEXT: cmpl $-32768, %eax ## imm = 0x8000 -; ATOM-NEXT: cmovgew %ax, %cx +; ATOM-NEXT: cmovgel %eax, %ecx ; ATOM-NEXT: movw %cx, (%rsi) ; ATOM-NEXT: retq ; @@ -993,7 +993,7 @@ ; MCU-NEXT: movl %eax, %ecx ; MCU-NEXT: .LBB23_2: ; MCU-NEXT: cmpl $-32768, %ecx # imm = 0x8000 -; MCU-NEXT: movw $-32768, %ax # imm = 0x8000 +; MCU-NEXT: movl $-32768, %eax # imm = 0x8000 ; MCU-NEXT: jl .LBB23_4 ; MCU-NEXT: # BB#3: ; MCU-NEXT: movl %ecx, %eax @@ -1093,8 +1093,8 @@ ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl $43, %eax ; CHECK-NEXT: testb $1, %sil -; CHECK-NEXT: cmovnew %ax, %di -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: cmovel %edi, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX ; CHECK-NEXT: retq ; ; MCU-LABEL: select_xor_1: Index: test/CodeGen/X86/select_const.ll =================================================================== --- test/CodeGen/X86/select_const.ll +++ test/CodeGen/X86/select_const.ll @@ -487,11 +487,10 @@ ; CHECK-LABEL: opaque_constant: ; CHECK: # BB#0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: movl $23, %ecx +; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: movq $-4, %rax ; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001 -; CHECK-NEXT: andq %rcx, %rax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: cmpq %rcx, %rsi ; CHECK-NEXT: sete %dl Index: test/CodeGen/X86/setcc-lowering.ll =================================================================== --- test/CodeGen/X86/setcc-lowering.ll +++ test/CodeGen/X86/setcc-lowering.ll @@ -76,7 +76,7 @@ ; KNL-32-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; KNL-32-NEXT: movw $-1, %dx +; KNL-32-NEXT: movl $-1, %edx ; KNL-32-NEXT: .p2align 4, 0x90 ; KNL-32-NEXT: .LBB1_1: # %for_loop599 ; KNL-32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -84,7 +84,7 @@ ; KNL-32-NEXT: movl %eax, %esi ; KNL-32-NEXT: sbbl $0, %esi ; KNL-32-NEXT: movl $0, %esi -; KNL-32-NEXT: cmovlw %dx, %si +; KNL-32-NEXT: cmovll %edx, %esi ; KNL-32-NEXT: kmovw %esi, %k1 ; KNL-32-NEXT: kandw %k0, %k1, %k1 ; KNL-32-NEXT: kmovw %k1, %esi Index: test/CodeGen/X86/vector-compare-results.ll =================================================================== --- test/CodeGen/X86/vector-compare-results.ll +++ test/CodeGen/X86/vector-compare-results.ll @@ -5748,7 +5748,7 @@ ; AVX512BW-NEXT: vpextrw $1, %xmm5, %edx ; AVX512BW-NEXT: xorl %eax, %eax ; AVX512BW-NEXT: cmpw %cx, %dx -; AVX512BW-NEXT: movl $65535, %ecx # imm = 0xFFFF +; AVX512BW-NEXT: movl $-1, %ecx ; AVX512BW-NEXT: movl $0, %edx ; AVX512BW-NEXT: cmovgl %ecx, %edx ; AVX512BW-NEXT: vmovd %xmm4, %esi