Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -20548,12 +20548,7 @@ } // Or finally, promote i8/i16 cmovs if it won't prevent folding a load. - // FIXME: we should not limit promotion of i8 case to only when the CMOV is - // legal, but EmitLoweredSelect() can not deail with these extensions - // being inserted between two CMOV's. (in i16 case too TBN) - // https://bugs.llvm.org/show_bug.cgi?id=40974 - if (((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) || - Op.getValueType() == MVT::i16) && + if ((Op.getValueType() == MVT::i8 || Op.getValueType() == MVT::i16) && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) { Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); @@ -28764,6 +28759,50 @@ return SinkMBB; } +template +inline bool isHarmlessInstructionBetweenTwoCMOVPhis(IterT It) { + if (It->isDebugInstr() || It->isImplicitDef() || It->isExtractSubreg() || + It->isInsertSubreg() || It->isSubregToReg() || It->isMoveReg() || + It->isCopy()) + return true; + switch (It->getOpcode()) { + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND_INREG: + case X86::MOVZX16rr16: + case X86::MOVZX16rr8: + case X86::MOVZX32rr16: + case X86::MOVZX32rr8: + case X86::MOVZX32rr8_NOREX: + case X86::MOVZX64rr16: + case X86::MOVZX64rr8: + case X86::MOVSX16rr16: + case X86::MOVSX16rr32: + case X86::MOVSX16rr8: + case X86::MOVSX32rr16: + case X86::MOVSX32rr32: + case X86::MOVSX32rr8: + case X86::MOVSX32rr8_NOREX: + case X86::MOVSX64rr16: + case X86::MOVSX64rr32: + case X86::MOVSX64rr8: + return true; + default: + return false; + } + // FIXME: can we instead simply accept all non-control-flow-altering instrs? +} + +template +inline IterT skipHarmlessInstructionsBetweenTwoPhisForward(IterT It, + IterT End) { + while (It != End && isHarmlessInstructionBetweenTwoCMOVPhis(It)) + It++; + return It; +} + MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, MachineBasicBlock *ThisMBB) const { @@ -28828,13 +28867,30 @@ if (isCMOVPseudo(MI)) { // See if we have a string of CMOVS with the same condition. Skip over - // intervening debug insts. + // intervening harmless insts. while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) && (NextMIIt->getOperand(3).getImm() == CC || - NextMIIt->getOperand(3).getImm() == OppCC)) { + NextMIIt->getOperand(3).getImm() == OppCC) && + // No instruction between the orig CMOV and this new CMOV (excl. all + // other CMOV's) may be the definition of op1 or op2 of new CMOV. + // Else moving this new CMOV will break def-use chain. + llvm::none_of( + llvm::make_range(MachineBasicBlock::iterator(MI), NextMIIt), + [&NextMIIt](MachineBasicBlock::iterator It) { + if (isCMOVPseudo(*It)) + return false; // CMOV can reference previous CMOV's. + return llvm::any_of( + It->operands(), [&NextMIIt](MachineOperand Op) { + if (!Op.isReg() || !Op.isDef()) + return false; + return Op.getReg() == NextMIIt->getOperand(1).getReg() || + Op.getReg() == NextMIIt->getOperand(2).getReg(); + }); + })) { LastCMOV = &*NextMIIt; ++NextMIIt; - NextMIIt = skipDebugInstructionsForward(NextMIIt, ThisMBB->end()); + NextMIIt = skipHarmlessInstructionsBetweenTwoPhisForward(NextMIIt, + ThisMBB->end()); } } @@ -28866,14 +28922,14 @@ SinkMBB->addLiveIn(X86::EFLAGS); } - // Transfer any debug instructions inside the CMOV sequence to the sunk block. - auto DbgEnd = MachineBasicBlock::iterator(LastCMOV); - auto DbgIt = MachineBasicBlock::iterator(MI); - while (DbgIt != DbgEnd) { - auto Next = std::next(DbgIt); - if (DbgIt->isDebugInstr()) - SinkMBB->push_back(DbgIt->removeFromParent()); - DbgIt = Next; + // Transfer any other instructions inside the CMOV sequence to the sunk block. + auto HarmlessEnd = MachineBasicBlock::iterator(LastCMOV); + auto HarmlessIt = MachineBasicBlock::iterator(MI); + while (HarmlessIt != HarmlessEnd) { + auto Next = std::next(HarmlessIt); + if (isHarmlessInstructionBetweenTwoCMOVPhis(HarmlessIt)) + SinkMBB->push_back(HarmlessIt->removeFromParent()); + HarmlessIt = Next; } // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. Index: test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll =================================================================== --- test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll +++ test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll @@ -89,7 +89,7 @@ ; I386-NOCMOV-NEXT: cmpb %cl, %al ; I386-NOCMOV-NEXT: jge .LBB1_2 ; I386-NOCMOV-NEXT: # %bb.1: -; I386-NOCMOV-NEXT: movl %ecx, %eax +; I386-NOCMOV-NEXT: movzbl %cl, %eax ; I386-NOCMOV-NEXT: .LBB1_2: ; I386-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; I386-NOCMOV-NEXT: retl @@ -115,7 +115,7 @@ ; I686-NOCMOV-NEXT: cmpb %cl, %al ; I686-NOCMOV-NEXT: jge .LBB1_2 ; I686-NOCMOV-NEXT: # %bb.1: -; I686-NOCMOV-NEXT: movl %ecx, %eax +; I686-NOCMOV-NEXT: movzbl %cl, %eax ; I686-NOCMOV-NEXT: .LBB1_2: ; I686-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; I686-NOCMOV-NEXT: retl @@ -227,7 +227,7 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind { ; I386-NOCMOV-LABEL: negative_CopyFromReg: ; I386-NOCMOV: # %bb.0: -; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; I386-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx ; I386-NOCMOV-NEXT: cmpb %cl, %al @@ -235,6 +235,7 @@ ; I386-NOCMOV-NEXT: # %bb.1: ; I386-NOCMOV-NEXT: movl %ecx, %eax ; I386-NOCMOV-NEXT: .LBB3_2: +; I386-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; I386-NOCMOV-NEXT: retl ; ; I386-CMOV-LABEL: negative_CopyFromReg: @@ -249,7 +250,7 @@ ; ; I686-NOCMOV-LABEL: negative_CopyFromReg: ; I686-NOCMOV: # %bb.0: -; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al +; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; I686-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx ; I686-NOCMOV-NEXT: cmpb %cl, %al @@ -257,6 +258,7 @@ ; I686-NOCMOV-NEXT: # %bb.1: ; I686-NOCMOV-NEXT: movl %ecx, %eax ; I686-NOCMOV-NEXT: .LBB3_2: +; I686-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; I686-NOCMOV-NEXT: retl ; ; I686-CMOV-LABEL: negative_CopyFromReg: @@ -290,13 +292,14 @@ define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind { ; I386-NOCMOV-LABEL: negative_CopyFromRegs: ; I386-NOCMOV: # %bb.0: -; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl -; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I386-NOCMOV-NEXT: cmpb %cl, %al ; I386-NOCMOV-NEXT: jge .LBB4_2 ; I386-NOCMOV-NEXT: # %bb.1: ; I386-NOCMOV-NEXT: movl %ecx, %eax ; I386-NOCMOV-NEXT: .LBB4_2: +; I386-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; I386-NOCMOV-NEXT: retl ; ; I386-CMOV-LABEL: negative_CopyFromRegs: @@ -310,13 +313,14 @@ ; ; I686-NOCMOV-LABEL: negative_CopyFromRegs: ; I686-NOCMOV: # %bb.0: -; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl -; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al +; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; I686-NOCMOV-NEXT: cmpb %cl, %al ; I686-NOCMOV-NEXT: jge .LBB4_2 ; I686-NOCMOV-NEXT: # %bb.1: ; I686-NOCMOV-NEXT: movl %ecx, %eax ; I686-NOCMOV-NEXT: .LBB4_2: +; I686-NOCMOV-NEXT: # kill: def $al killed $al killed $eax ; I686-NOCMOV-NEXT: retl ; ; I686-CMOV-LABEL: negative_CopyFromRegs: Index: test/CodeGen/X86/cmov-promotion.ll =================================================================== --- test/CodeGen/X86/cmov-promotion.ll +++ test/CodeGen/X86/cmov-promotion.ll @@ -15,12 +15,11 @@ ; NO_CMOV-LABEL: cmov_zpromotion_8_to_16: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $117, %al +; NO_CMOV-NEXT: movl $117, %eax ; NO_CMOV-NEXT: jne .LBB0_2 ; NO_CMOV-NEXT: # %bb.1: -; NO_CMOV-NEXT: movb $-19, %al +; NO_CMOV-NEXT: movl $237, %eax ; NO_CMOV-NEXT: .LBB0_2: -; NO_CMOV-NEXT: movzbl %al, %eax ; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax ; NO_CMOV-NEXT: retl %t0 = select i1 %c, i8 117, i8 -19 @@ -40,12 +39,11 @@ ; NO_CMOV-LABEL: cmov_zpromotion_8_to_32: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al +; NO_CMOV-NEXT: movl $126, %eax ; NO_CMOV-NEXT: jne .LBB1_2 ; NO_CMOV-NEXT: # %bb.1: -; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: movl $255, %eax ; NO_CMOV-NEXT: .LBB1_2: -; NO_CMOV-NEXT: movzbl %al, %eax ; NO_CMOV-NEXT: retl %t0 = select i1 %c, i8 12414, i8 -1 %ret = zext i8 %t0 to i32 @@ -64,12 +62,11 @@ ; NO_CMOV-LABEL: cmov_zpromotion_8_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al +; NO_CMOV-NEXT: movl $126, %eax ; NO_CMOV-NEXT: jne .LBB2_2 ; NO_CMOV-NEXT: # %bb.1: -; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: movl $255, %eax ; NO_CMOV-NEXT: .LBB2_2: -; NO_CMOV-NEXT: movzbl %al, %eax ; NO_CMOV-NEXT: xorl %edx, %edx ; NO_CMOV-NEXT: retl %t0 = select i1 %c, i8 12414, i8 -1 @@ -162,10 +159,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_8_to_16: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $117, %al +; NO_CMOV-NEXT: movl $117, %eax ; NO_CMOV-NEXT: jne .LBB6_2 ; NO_CMOV-NEXT: # %bb.1: -; NO_CMOV-NEXT: movb $-19, %al +; NO_CMOV-NEXT: movl $237, %eax ; NO_CMOV-NEXT: .LBB6_2: ; NO_CMOV-NEXT: movsbl %al, %eax ; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax @@ -188,10 +185,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_8_to_32: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al +; NO_CMOV-NEXT: movl $126, %eax ; NO_CMOV-NEXT: jne .LBB7_2 ; NO_CMOV-NEXT: # %bb.1: -; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: movl $255, %eax ; NO_CMOV-NEXT: .LBB7_2: ; NO_CMOV-NEXT: movsbl %al, %eax ; NO_CMOV-NEXT: retl @@ -213,10 +210,10 @@ ; NO_CMOV-LABEL: cmov_spromotion_8_to_64: ; NO_CMOV: # %bb.0: ; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp) -; NO_CMOV-NEXT: movb $126, %al +; NO_CMOV-NEXT: movl $126, %eax ; NO_CMOV-NEXT: jne .LBB8_2 ; NO_CMOV-NEXT: # %bb.1: -; NO_CMOV-NEXT: movb $-1, %al +; NO_CMOV-NEXT: movl $255, %eax ; NO_CMOV-NEXT: .LBB8_2: ; NO_CMOV-NEXT: movsbl %al, %eax ; NO_CMOV-NEXT: movl %eax, %edx Index: test/CodeGen/X86/cmovcmov.ll =================================================================== --- test/CodeGen/X86/cmovcmov.ll +++ test/CodeGen/X86/cmovcmov.ll @@ -338,19 +338,19 @@ ; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; NOCMOV-NEXT: movb $20, %al -; NOCMOV-NEXT: movb $20, %cl -; NOCMOV-NEXT: jge .LBB7_1 -; NOCMOV-NEXT: # %bb.2: # %entry +; NOCMOV-NEXT: jl .LBB7_2 +; NOCMOV-NEXT: # %bb.1: # %entry +; NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al +; NOCMOV-NEXT: .LBB7_2: # %entry +; NOCMOV-NEXT: movzbl %al, %ecx +; NOCMOV-NEXT: movl $20, %eax ; NOCMOV-NEXT: jle .LBB7_3 -; NOCMOV-NEXT: .LBB7_4: # %entry +; NOCMOV-NEXT: # %bb.4: # %entry ; NOCMOV-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; NOCMOV-NEXT: jne .LBB7_5 ; NOCMOV-NEXT: .LBB7_6: # %entry ; NOCMOV-NEXT: movb %al, g8 ; NOCMOV-NEXT: retl -; NOCMOV-NEXT: .LBB7_1: # %entry -; NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl -; NOCMOV-NEXT: jg .LBB7_4 ; NOCMOV-NEXT: .LBB7_3: # %entry ; NOCMOV-NEXT: movl %ecx, %eax ; NOCMOV-NEXT: cmpl $0, {{[0-9]+}}(%esp) Index: test/CodeGen/X86/copy-eflags.ll =================================================================== --- test/CodeGen/X86/copy-eflags.ll +++ test/CodeGen/X86/copy-eflags.ll @@ -205,16 +205,15 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movb {{[0-9]+}}(%esp), %ch ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB3_5: # %bb1 +; X32-NEXT: .LBB3_6: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: cltd ; X32-NEXT: idivl %edi ; X32-NEXT: .LBB3_1: # %bb1 @@ -225,25 +224,29 @@ ; X32-NEXT: cmpl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: sbbl %edx, %eax -; X32-NEXT: setl %al +; X32-NEXT: setge %al ; X32-NEXT: setl %dl ; X32-NEXT: movzbl %dl, %edi ; X32-NEXT: negl %edi ; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_3 -; X32-NEXT: # %bb.2: # %bb1 +; X32-NEXT: je .LBB3_2 +; X32-NEXT: # %bb.3: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movb %ch, %cl -; X32-NEXT: .LBB3_3: # %bb1 +; X32-NEXT: movl %esi, %ecx +; X32-NEXT: jmp .LBB3_4 +; X32-NEXT: .p2align 4, 0x90 +; X32-NEXT: .LBB3_2: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: .LBB3_4: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movb %cl, (%ebp) ; X32-NEXT: movl (%ebx), %edx ; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_5 -; X32-NEXT: # %bb.4: # %bb1 +; X32-NEXT: je .LBB3_6 +; X32-NEXT: # %bb.5: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movl %edx, %edi -; X32-NEXT: jmp .LBB3_5 +; X32-NEXT: jmp .LBB3_6 ; ; X64-LABEL: PR37100: ; X64: # %bb.0: # %bb Index: test/CodeGen/X86/fshl.ll =================================================================== --- test/CodeGen/X86/fshl.ll +++ test/CodeGen/X86/fshl.ll @@ -16,22 +16,23 @@ define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { ; X86-LABEL: var_shift_i8: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %ah -; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch ; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: andb $7, %dl -; X86-NEXT: movb %al, %ch -; X86-NEXT: movb %dl, %cl -; X86-NEXT: shlb %cl, %ch ; X86-NEXT: movb $8, %cl ; X86-NEXT: subb %dl, %cl -; X86-NEXT: shrb %cl, %ah +; X86-NEXT: shrb %cl, %ch +; X86-NEXT: movb %al, %dh +; X86-NEXT: movb %dl, %cl +; X86-NEXT: shlb %cl, %dh ; X86-NEXT: testb %dl, %dl ; X86-NEXT: je .LBB0_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: orb %ah, %ch -; X86-NEXT: movb %ch, %al +; X86-NEXT: orb %ch, %dh +; X86-NEXT: movzbl %dh, %eax ; X86-NEXT: .LBB0_2: +; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: var_shift_i8: Index: test/CodeGen/X86/fshr.ll =================================================================== --- test/CodeGen/X86/fshr.ll +++ test/CodeGen/X86/fshr.ll @@ -16,22 +16,23 @@ define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { ; X86-LABEL: var_shift_i8: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %ah -; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch ; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: andb $7, %dl -; X86-NEXT: movb %al, %ch +; X86-NEXT: movb %al, %dh ; X86-NEXT: movb %dl, %cl -; X86-NEXT: shrb %cl, %ch +; X86-NEXT: shrb %cl, %dh ; X86-NEXT: movb $8, %cl ; X86-NEXT: subb %dl, %cl -; X86-NEXT: shlb %cl, %ah +; X86-NEXT: shlb %cl, %ch ; X86-NEXT: testb %dl, %dl ; X86-NEXT: je .LBB0_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: orb %ch, %ah -; X86-NEXT: movb %ah, %al +; X86-NEXT: orb %dh, %ch +; X86-NEXT: movzbl %ch, %eax ; X86-NEXT: .LBB0_2: +; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: var_shift_i8: Index: test/CodeGen/X86/midpoint-int.ll =================================================================== --- test/CodeGen/X86/midpoint-int.ll +++ test/CodeGen/X86/midpoint-int.ll @@ -1036,26 +1036,31 @@ ; ; X32-LABEL: scalar_i8_signed_reg_reg: ; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %ah -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl -; X32-NEXT: cmpb %ah, %cl +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: cmpb %bl, %al ; X32-NEXT: setle %dl -; X32-NEXT: movb %ah, %ch +; X32-NEXT: movl %ebx, %esi ; X32-NEXT: jg .LBB15_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movb %cl, %ch +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: .LBB15_2: -; X32-NEXT: movb %cl, %al ; X32-NEXT: jge .LBB15_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movb %ah, %al +; X32-NEXT: movl %esi, %eax ; X32-NEXT: .LBB15_4: -; X32-NEXT: subb %ch, %al ; X32-NEXT: addb %dl, %dl ; X32-NEXT: decb %dl +; X32-NEXT: subb %bl, %al ; X32-NEXT: shrb %al +; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: mulb %dl ; X32-NEXT: addb %cl, %al +; X32-NEXT: popl %esi +; X32-NEXT: popl %ebx ; X32-NEXT: retl %t3 = icmp sgt i8 %a1, %a2 ; signed %t4 = select i1 %t3, i8 -1, i8 1 @@ -1088,24 +1093,27 @@ ; ; X32-LABEL: scalar_i8_unsigned_reg_reg: ; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: cmpb %al, %cl ; X32-NEXT: setbe %dl ; X32-NEXT: ja .LBB16_1 ; X32-NEXT: # %bb.2: -; X32-NEXT: movb %cl, %ah +; X32-NEXT: movl %ecx, %ebx ; X32-NEXT: jmp .LBB16_3 ; X32-NEXT: .LBB16_1: -; X32-NEXT: movb %al, %ah -; X32-NEXT: movb %cl, %al +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: .LBB16_3: -; X32-NEXT: subb %ah, %al ; X32-NEXT: addb %dl, %dl ; X32-NEXT: decb %dl +; X32-NEXT: subb %bl, %al ; X32-NEXT: shrb %al +; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: mulb %dl ; X32-NEXT: addb %cl, %al +; X32-NEXT: popl %ebx ; X32-NEXT: retl %t3 = icmp ugt i8 %a1, %a2 %t4 = select i1 %t3, i8 -1, i8 1 @@ -1141,27 +1149,32 @@ ; ; X32-LABEL: scalar_i8_signed_mem_reg: ; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %ah -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movb (%ecx), %cl -; X32-NEXT: cmpb %ah, %cl +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: cmpb %bl, %al ; X32-NEXT: setle %dl -; X32-NEXT: movb %ah, %ch +; X32-NEXT: movl %ebx, %esi ; X32-NEXT: jg .LBB17_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movb %cl, %ch +; X32-NEXT: movl %eax, %ebx ; X32-NEXT: .LBB17_2: -; X32-NEXT: movb %cl, %al ; X32-NEXT: jge .LBB17_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movb %ah, %al +; X32-NEXT: movl %esi, %eax ; X32-NEXT: .LBB17_4: -; X32-NEXT: subb %ch, %al ; X32-NEXT: addb %dl, %dl ; X32-NEXT: decb %dl +; X32-NEXT: subb %bl, %al ; X32-NEXT: shrb %al +; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: mulb %dl ; X32-NEXT: addb %cl, %al +; X32-NEXT: popl %esi +; X32-NEXT: popl %ebx ; X32-NEXT: retl %a1 = load i8, i8* %a1_addr %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -1195,27 +1208,30 @@ ; ; X32-LABEL: scalar_i8_signed_reg_mem: ; X32: # %bb.0: -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movb (%eax), %ah -; X32-NEXT: cmpb %ah, %cl +; X32-NEXT: movb %al, %dh +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movzbl (%ecx), %ebx +; X32-NEXT: cmpb %bl, %al ; X32-NEXT: setle %dl -; X32-NEXT: movb %ah, %ch +; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: jg .LBB18_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movb %cl, %ch +; X32-NEXT: movl %eax, %ecx ; X32-NEXT: .LBB18_2: -; X32-NEXT: movb %cl, %al ; X32-NEXT: jge .LBB18_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movb %ah, %al +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: .LBB18_4: -; X32-NEXT: subb %ch, %al ; X32-NEXT: addb %dl, %dl ; X32-NEXT: decb %dl +; X32-NEXT: subb %cl, %al ; X32-NEXT: shrb %al +; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: mulb %dl -; X32-NEXT: addb %cl, %al +; X32-NEXT: addb %dh, %al +; X32-NEXT: popl %ebx ; X32-NEXT: retl %a2 = load i8, i8* %a2_addr %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -1250,28 +1266,31 @@ ; ; X32-LABEL: scalar_i8_signed_mem_mem: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movb (%ecx), %cl -; X32-NEXT: movb (%eax), %ah -; X32-NEXT: cmpb %ah, %cl +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movb %al, %dh +; X32-NEXT: movzbl (%ecx), %ebx +; X32-NEXT: cmpb %bl, %al ; X32-NEXT: setle %dl -; X32-NEXT: movb %ah, %ch +; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: jg .LBB19_2 ; X32-NEXT: # %bb.1: -; X32-NEXT: movb %cl, %ch +; X32-NEXT: movl %eax, %ecx ; X32-NEXT: .LBB19_2: -; X32-NEXT: movb %cl, %al ; X32-NEXT: jge .LBB19_4 ; X32-NEXT: # %bb.3: -; X32-NEXT: movb %ah, %al +; X32-NEXT: movl %ebx, %eax ; X32-NEXT: .LBB19_4: -; X32-NEXT: subb %ch, %al ; X32-NEXT: addb %dl, %dl ; X32-NEXT: decb %dl +; X32-NEXT: subb %cl, %al ; X32-NEXT: shrb %al +; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: mulb %dl -; X32-NEXT: addb %cl, %al +; X32-NEXT: addb %dh, %al +; X32-NEXT: popl %ebx ; X32-NEXT: retl %a1 = load i8, i8* %a1_addr %a2 = load i8, i8* %a2_addr Index: test/CodeGen/X86/pseudo_cmov_lower.ll =================================================================== --- test/CodeGen/X86/pseudo_cmov_lower.ll +++ test/CodeGen/X86/pseudo_cmov_lower.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s +; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s ; This test checks that only a single js gets generated in the final code ; for lowering the CMOV pseudos that get created for this IR. @@ -223,7 +223,7 @@ ; tests those opcodes. ; ; CHECK-LABEL: foo9: -; CHECK: ja +; CHECK-COUNT-119: ja ; CHECK-NOT: ja define void @foo9(i32 %v1, <8 x i1> %v12, <8 x i1> %v13, Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -1174,7 +1174,7 @@ ; MCU-NEXT: movl %eax, %ecx ; MCU-NEXT: .LBB21_2: ; MCU-NEXT: cmpl $-128, %ecx -; MCU-NEXT: movb $-128, %al +; MCU-NEXT: movl $128, %eax ; MCU-NEXT: jl .LBB21_4 ; MCU-NEXT: # %bb.3: ; MCU-NEXT: movl %ecx, %eax