Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -20535,7 +20535,9 @@ // X86 doesn't have an i8 cmov. If both operands are the result of a truncate // widen the cmov and push the truncate through. This avoids introducing a new // branch during isel and doesn't add any extensions. - if (Op.getValueType() == MVT::i8 && + // It would make sense to do this only when there is CMOV. + // Else, it should be best to leave the decision to the later code. + if (Subtarget.hasCMov() && Op.getValueType() == MVT::i8 && Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); if (T1.getValueType() == T2.getValueType() && @@ -20548,7 +20550,10 @@ } // Promote i16 cmovs if it won't prevent folding a load. - if (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) { + // But it would make sense to do this only when there is CMOV. + // Else, it should be best to leave the decision to the later code. + if (Subtarget.hasCMov() && Op.getValueType() == MVT::i16 && + !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) { Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); SDValue Ops[] = { Op2, Op1, CC, Cond }; Index: test/CodeGen/X86/fshl.ll =================================================================== --- test/CodeGen/X86/fshl.ll +++ test/CodeGen/X86/fshl.ll @@ -69,10 +69,10 @@ ; X86-SLOW: # %bb.0: ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %esi ; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-SLOW-NEXT: andb $15, %dl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl %eax, %edi ; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shll %cl, %edi Index: test/CodeGen/X86/fshr.ll =================================================================== --- test/CodeGen/X86/fshr.ll +++ test/CodeGen/X86/fshr.ll @@ -69,10 +69,10 @@ ; X86-SLOW: # %bb.0: ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-SLOW-NEXT: andb $15, %dl -; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl %eax, %edi ; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edi Index: test/CodeGen/X86/midpoint-int.ll =================================================================== --- test/CodeGen/X86/midpoint-int.ll +++ test/CodeGen/X86/midpoint-int.ll @@ -730,32 +730,34 @@ ; ; X32-LABEL: scalar_i16_signed_reg_reg: ; X32: # %bb.0: +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: cmpw %ax, %cx -; X32-NEXT: setle %dl -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: cmpw %dx, %cx +; X32-NEXT: setle %bl +; X32-NEXT: movl %edx, %edi ; X32-NEXT: jg .LBB10_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movl %ecx, %esi +; X32-NEXT: movl %eax, %edi ; X32-NEXT: .LBB10_2: -; X32-NEXT: leal -1(%edx,%edx), %edx -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: leal -1(%ebx,%ebx), %esi ; X32-NEXT: jge .LBB10_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl %edx, %eax ; X32-NEXT: .LBB10_4: -; X32-NEXT: subl %esi, %edi -; X32-NEXT: movzwl %di, %eax +; X32-NEXT: subl %edi, %eax +; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: shrl %eax -; X32-NEXT: imull %edx, %eax +; X32-NEXT: imull %esi, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx ; X32-NEXT: retl %t3 = icmp sgt i16 %a1, %a2 ; signed %t4 = select i1 %t3, i16 -1, i16 1 @@ -788,28 +790,34 @@ ; ; X32-LABEL: scalar_i16_unsigned_reg_reg: ; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: cmpw %ax, %cx -; X32-NEXT: setbe %dl -; X32-NEXT: leal -1(%edx,%edx), %edx -; X32-NEXT: ja .LBB11_1 -; X32-NEXT: # %bb.2: -; X32-NEXT: movl %ecx, %esi -; X32-NEXT: jmp .LBB11_3 -; X32-NEXT: .LBB11_1: -; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: .LBB11_3: -; X32-NEXT: subl %esi, %eax +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: cmpw %dx, %cx +; X32-NEXT: setbe %bl +; X32-NEXT: movl %edx, %edi +; X32-NEXT: ja .LBB11_2 +; X32-NEXT: # %bb.1: +; X32-NEXT: movl %eax, %edi +; X32-NEXT: .LBB11_2: +; X32-NEXT: leal -1(%ebx,%ebx), %esi +; X32-NEXT: ja .LBB11_4 +; X32-NEXT: # %bb.3: +; X32-NEXT: movl %edx, %eax +; X32-NEXT: .LBB11_4: +; X32-NEXT: subl %edi, %eax ; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: shrl %eax -; X32-NEXT: imull %edx, %eax +; X32-NEXT: imull %esi, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx ; X32-NEXT: retl %t3 = icmp ugt i16 %a1, %a2 %t4 = select i1 %t3, i16 -1, i16 1 @@ -845,33 +853,35 @@ ; ; X32-LABEL: scalar_i16_signed_mem_reg: ; X32: # %bb.0: +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movzwl (%ecx), %ecx -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: cmpw %ax, %cx -; X32-NEXT: setle %dl -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movzwl (%eax), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: cmpw %dx, %cx +; X32-NEXT: setle %bl +; X32-NEXT: movl %edx, %edi ; X32-NEXT: jg .LBB12_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movl %ecx, %esi +; X32-NEXT: movl %eax, %edi ; X32-NEXT: .LBB12_2: -; X32-NEXT: leal -1(%edx,%edx), %edx -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: leal -1(%ebx,%ebx), %esi ; X32-NEXT: jge .LBB12_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl %edx, %eax ; X32-NEXT: .LBB12_4: -; X32-NEXT: subl %esi, %edi -; X32-NEXT: movzwl %di, %eax +; X32-NEXT: subl %edi, %eax +; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: shrl %eax -; X32-NEXT: imull %edx, %eax +; X32-NEXT: imull %esi, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx ; X32-NEXT: retl %a1 = load i16, i16* %a1_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -906,33 +916,35 @@ ; ; X32-LABEL: scalar_i16_signed_reg_mem: ; X32: # %bb.0: +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movzwl (%eax), %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: cmpw %ax, %cx -; X32-NEXT: setle %dl -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movzwl (%edx), %edx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: cmpw %dx, %cx +; X32-NEXT: setle %bl +; X32-NEXT: movl %edx, %edi ; X32-NEXT: jg .LBB13_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movl %ecx, %esi +; X32-NEXT: movl %eax, %edi ; X32-NEXT: .LBB13_2: -; X32-NEXT: leal -1(%edx,%edx), %edx -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: leal -1(%ebx,%ebx), %esi ; X32-NEXT: jge .LBB13_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl %edx, %eax ; X32-NEXT: .LBB13_4: -; X32-NEXT: subl %esi, %edi -; X32-NEXT: movzwl %di, %eax +; X32-NEXT: subl %edi, %eax +; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: shrl %eax -; X32-NEXT: imull %edx, %eax +; X32-NEXT: imull %esi, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx ; X32-NEXT: retl %a2 = load i16, i16* %a2_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -968,34 +980,36 @@ ; ; X32-LABEL: scalar_i16_signed_mem_mem: ; X32: # %bb.0: +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movzwl (%ecx), %ecx -; X32-NEXT: movzwl (%eax), %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: cmpw %ax, %cx -; X32-NEXT: setle %dl -; X32-NEXT: movl %eax, %esi +; X32-NEXT: movzwl (%eax), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movzwl (%edx), %edx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: cmpw %dx, %cx +; X32-NEXT: setle %bl +; X32-NEXT: movl %edx, %edi ; X32-NEXT: jg .LBB14_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movl %ecx, %esi +; X32-NEXT: movl %eax, %edi ; X32-NEXT: .LBB14_2: -; X32-NEXT: leal -1(%edx,%edx), %edx -; X32-NEXT: movl %ecx, %edi +; X32-NEXT: leal -1(%ebx,%ebx), %esi ; X32-NEXT: jge .LBB14_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movl %eax, %edi +; X32-NEXT: movl %edx, %eax ; X32-NEXT: .LBB14_4: -; X32-NEXT: subl %esi, %edi -; X32-NEXT: movzwl %di, %eax +; X32-NEXT: subl %edi, %eax +; X32-NEXT: movzwl %ax, %eax ; X32-NEXT: shrl %eax -; X32-NEXT: imull %edx, %eax +; X32-NEXT: imull %esi, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx ; X32-NEXT: retl %a1 = load i16, i16* %a1_addr %a2 = load i16, i16* %a2_addr Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -1245,7 +1245,7 @@ ; MCU-NEXT: movl %eax, %ecx ; MCU-NEXT: .LBB22_2: ; MCU-NEXT: cmpl $-32768, %ecx # imm = 0x8000 -; MCU-NEXT: movl $32768, %eax # imm = 0x8000 +; MCU-NEXT: movw $-32768, %ax # imm = 0x8000 ; MCU-NEXT: jl .LBB22_4 ; MCU-NEXT: # %bb.3: ; MCU-NEXT: movl %ecx, %eax