Index: lib/Target/X86/X86InstrArithmetic.td =================================================================== --- lib/Target/X86/X86InstrArithmetic.td +++ lib/Target/X86/X86InstrArithmetic.td @@ -913,8 +913,8 @@ let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_RF; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#8rr : BinOpRR_RF; def NAME#16rr : BinOpRR_RF; def NAME#32rr : BinOpRR_RF; def NAME#64rr : BinOpRR_RF; @@ -931,9 +931,9 @@ def NAME#32rm : BinOpRM_RF; def NAME#64rm : BinOpRM_RF; - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -584,10 +584,12 @@ const MachineOperand *&Destination) const override; private: - MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc, - MachineFunction::iterator &MFI, - MachineInstr &MI, - LiveVariables *LV) const; + /// Helper for convertToThreeAddress when 16-bit LEA is disabled or for + /// 8-bit ops. Use 32-bit LEA to form 3-address code by promoting to a 32-bit + /// superregister and then truncating back down to a 8/16-bit subregister. + MachineInstr *convertToThreeAddressWithLEA( + unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, + LiveVariables *LV, bool Is16BitOp = true) const; /// Handles memory folding for special case instructions, for instance those /// requiring custom manipulation of the address. Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -794,12 +794,9 @@ return true; } -/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit -/// LEA to form 3-address code by promoting to a 32-bit superregister and then -/// truncating back down to a 16-bit subregister. MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, - LiveVariables *LV) const { + LiveVariables *LV, bool Is16BitOp) const { MachineBasicBlock::iterator MBBI = MI.getIterator(); unsigned Dest = MI.getOperand(0).getReg(); unsigned Src = MI.getOperand(1).getReg(); @@ -819,7 +816,7 @@ } // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // we will be shifting and then extracting the lower 8/16-bits. // This has the potential to cause partial register stall. e.g. // movw (%rbp,%rcx,2), %dx // leal -65(%rdx), %esi @@ -828,7 +825,8 @@ BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); MachineInstr *InsMI = BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(leaInReg, RegState::Define, X86::sub_16bit) + .addReg(leaInReg, RegState::Define, + Is16BitOp ? X86::sub_16bit : X86::sub_8bit) .addReg(Src, getKillRegState(isKill)); MachineInstrBuilder MIB = @@ -847,12 +845,14 @@ case X86::DEC16r: addRegOffset(MIB, leaInReg, true, -1); break; + case X86::ADD8ri: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri_DB: case X86::ADD16ri8_DB: addRegOffset(MIB, leaInReg, true, MI.getOperand(2).getImm()); break; + case X86::ADD8rr: case X86::ADD16rr: case X86::ADD16rr_DB: { unsigned Src2 = MI.getOperand(2).getReg(); @@ -870,10 +870,11 @@ else leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // we will be shifting and then extracting the lower 8/16-bits. BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(leaInReg2, RegState::Define, X86::sub_16bit) + .addReg(leaInReg2, RegState::Define, + Is16BitOp ? X86::sub_16bit : X86::sub_8bit) .addReg(Src2, getKillRegState(isKill2)); addRegReg(MIB, leaInReg, true, leaInReg2, true); } @@ -887,7 +888,8 @@ MachineInstr *ExtMI = BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); + .addReg(leaOutReg, RegState::Kill, + Is16BitOp ? X86::sub_16bit : X86::sub_8bit); if (LV) { // Update live variables @@ -1110,6 +1112,9 @@ LV->replaceKillInstruction(SrcReg2, MI, *NewMI); break; } + case X86::ADD8rr: + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, false) + : nullptr; case X86::ADD16rr: case X86::ADD16rr_DB: { if (DisableLEA16) @@ -1160,6 +1165,9 @@ NewMI = addOffset(MIB, MI.getOperand(2)); break; } + case X86::ADD8ri: + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, false) + : nullptr; case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri_DB: Index: test/CodeGen/X86/GlobalISel/add-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/add-scalar.ll +++ test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -57,8 +57,9 @@ define i8 @test_add_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_add_i8: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal (%rsi,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; Index: test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll =================================================================== --- test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll +++ test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll @@ -11,8 +11,9 @@ ; X64-LABEL: test_shl_i4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: addb %sil, %cl +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edx killed $edx def $rdx +; X64-NEXT: leal (%rdx,%rsi), %ecx ; X64-NEXT: andb $15, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shlb %cl, %al Index: test/CodeGen/X86/GlobalISel/shl-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/shl-scalar.ll +++ test/CodeGen/X86/GlobalISel/shl-scalar.ll @@ -147,8 +147,8 @@ define i8 @test_shl_i8_imm1(i32 %arg1) { ; X64-LABEL: test_shl_i8_imm1: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: addb %al, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 Index: test/CodeGen/X86/fixup-bw-copy.ll =================================================================== --- test/CodeGen/X86/fixup-bw-copy.ll +++ test/CodeGen/X86/fixup-bw-copy.ll @@ -43,9 +43,10 @@ define i8 @test_movb_hreg(i16 %a0) { ; X64-LABEL: test_movb_hreg: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrl $8, %eax -; X64-NEXT: addb %dil, %al +; X64-NEXT: leal (%rax,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; Index: test/CodeGen/X86/fshr.ll =================================================================== --- test/CodeGen/X86/fshr.ll +++ test/CodeGen/X86/fshr.ll @@ -358,9 +358,9 @@ ; ; X64-LABEL: const_shift_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: shrb $7, %sil -; X64-NEXT: addb %al, %al +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: orb %sil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq Index: test/CodeGen/X86/iabs.ll =================================================================== --- test/CodeGen/X86/iabs.ll +++ test/CodeGen/X86/iabs.ll @@ -21,10 +21,10 @@ ; ; X64-LABEL: test_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movl %edi, %ecx ; X64-NEXT: sarb $7, %cl -; X64-NEXT: addb %cl, %al +; X64-NEXT: leal (%rdi,%rcx), %eax ; X64-NEXT: xorb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq Index: test/CodeGen/X86/mul-constant-i8.ll =================================================================== --- test/CodeGen/X86/mul-constant-i8.ll +++ test/CodeGen/X86/mul-constant-i8.ll @@ -14,8 +14,8 @@ define i8 @test_mul_by_2(i8 %x) { ; X64-LABEL: test_mul_by_2: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: addb %al, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %m = mul i8 %x, 2 Index: test/CodeGen/X86/popcnt.ll =================================================================== --- test/CodeGen/X86/popcnt.ll +++ test/CodeGen/X86/popcnt.ll @@ -25,6 +25,7 @@ ; ; X64-LABEL: cnt8: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrb %al ; X64-NEXT: andb $85, %al @@ -36,8 +37,9 @@ ; X64-NEXT: addb %al, %dil ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrb $4, %al -; X64-NEXT: addb %dil, %al +; X64-NEXT: leal (%rax,%rdi), %eax ; X64-NEXT: andb $15, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-POPCNT-LABEL: cnt8: Index: test/CodeGen/X86/pr23664.ll =================================================================== --- test/CodeGen/X86/pr23664.ll +++ test/CodeGen/X86/pr23664.ll @@ -4,8 +4,8 @@ define i2 @f(i32 %arg) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: addb %al, %al +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (%rdi,%rdi), %eax ; CHECK-NEXT: orb $1, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -642,9 +642,9 @@ ; ; X64-LABEL: rotate_demanded_bits_3: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %eax -; X64-NEXT: addb %cl, %cl +; X64-NEXT: leal (%rsi,%rsi), %ecx ; X64-NEXT: andb $30, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: roll %cl, %eax Index: test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- test/CodeGen/X86/scheduler-backtracking.ll +++ test/CodeGen/X86/scheduler-backtracking.ll @@ -19,29 +19,28 @@ ; ILP-NEXT: incl %esi ; ILP-NEXT: addb %sil, %sil ; ILP-NEXT: orb $1, %sil -; ILP-NEXT: movl $1, %r9d +; ILP-NEXT: movl $1, %r10d ; ILP-NEXT: xorl %r14d, %r14d ; ILP-NEXT: movl %esi, %ecx -; ILP-NEXT: shldq %cl, %r9, %r14 +; ILP-NEXT: shldq %cl, %r10, %r14 ; ILP-NEXT: movl $1, %edx ; ILP-NEXT: shlq %cl, %rdx -; ILP-NEXT: movl %esi, %r11d -; ILP-NEXT: addb $-128, %r11b -; ILP-NEXT: movb $-128, %r10b +; ILP-NEXT: leal -128(%rsi), %r9d +; ILP-NEXT: movb $-128, %r11b ; ILP-NEXT: xorl %ebx, %ebx -; ILP-NEXT: movl %r11d, %ecx -; ILP-NEXT: shldq %cl, %r9, %rbx +; ILP-NEXT: movl %r9d, %ecx +; ILP-NEXT: shldq %cl, %r10, %rbx ; ILP-NEXT: testb $64, %sil ; ILP-NEXT: cmovneq %rdx, %r14 ; ILP-NEXT: cmovneq %r8, %rdx ; ILP-NEXT: movl $1, %edi ; ILP-NEXT: shlq %cl, %rdi -; ILP-NEXT: subb %sil, %r10b -; ILP-NEXT: movl %r10d, %ecx -; ILP-NEXT: shrdq %cl, %r8, %r9 -; ILP-NEXT: testb $64, %r10b -; ILP-NEXT: cmovneq %r8, %r9 +; ILP-NEXT: subb %sil, %r11b +; ILP-NEXT: movl %r11d, %ecx +; ILP-NEXT: shrdq %cl, %r8, %r10 ; ILP-NEXT: testb $64, %r11b +; ILP-NEXT: cmovneq %r8, %r10 +; ILP-NEXT: testb $64, %r9b ; ILP-NEXT: cmovneq %rdi, %rbx ; ILP-NEXT: cmovneq %r8, %rdi ; ILP-NEXT: testb %sil, %sil @@ -52,7 +51,7 @@ ; ILP-NEXT: cmovnsq %r8, %rbx ; ILP-NEXT: cmoveq %r8, %rbx ; ILP-NEXT: movq %rbx, 24(%rax) -; ILP-NEXT: cmovnsq %r9, %rdi +; ILP-NEXT: cmovnsq %r10, %rdi ; ILP-NEXT: cmoveq %r8, %rdi ; ILP-NEXT: movq %rdi, 16(%rax) ; ILP-NEXT: popq %rbx @@ -76,7 +75,7 @@ ; HYBRID-NEXT: xorl %r10d, %r10d ; HYBRID-NEXT: movl %esi, %ecx ; HYBRID-NEXT: shldq %cl, %r11, %r10 -; HYBRID-NEXT: addb $-128, %cl +; HYBRID-NEXT: leal -128(%rsi), %ecx ; HYBRID-NEXT: xorl %edi, %edi ; HYBRID-NEXT: shldq %cl, %r11, %rdi ; HYBRID-NEXT: movl $1, %edx @@ -119,7 +118,7 @@ ; BURR-NEXT: xorl %r10d, %r10d ; BURR-NEXT: movl %esi, %ecx ; BURR-NEXT: shldq %cl, %r11, %r10 -; BURR-NEXT: addb $-128, %cl +; BURR-NEXT: leal -128(%rsi), %ecx ; BURR-NEXT: xorl %edi, %edi ; BURR-NEXT: shldq %cl, %r11, %rdi ; BURR-NEXT: movl $1, %edx @@ -160,8 +159,7 @@ ; SRC-NEXT: shrdq %cl, %r8, %r10 ; SRC-NEXT: testb $64, %cl ; SRC-NEXT: cmovneq %r8, %r10 -; SRC-NEXT: movl %esi, %r9d -; SRC-NEXT: addb $-128, %r9b +; SRC-NEXT: leal -128(%rsi), %r9d ; SRC-NEXT: xorl %edx, %edx ; SRC-NEXT: movl %r9d, %ecx ; SRC-NEXT: shldq %cl, %rdi, %rdx @@ -215,13 +213,12 @@ ; LIN-NEXT: cmovneq %rdx, %rdi ; LIN-NEXT: cmovsq %r9, %rdi ; LIN-NEXT: movq %rdi, 8(%rax) -; LIN-NEXT: movl %esi, %edx -; LIN-NEXT: addb $-128, %dl -; LIN-NEXT: movl $1, %r10d -; LIN-NEXT: movl %edx, %ecx -; LIN-NEXT: shlq %cl, %r10 -; LIN-NEXT: testb $64, %dl -; LIN-NEXT: movq %r10, %rdi +; LIN-NEXT: leal -128(%rsi), %r10d +; LIN-NEXT: movl $1, %edx +; LIN-NEXT: movl %r10d, %ecx +; LIN-NEXT: shlq %cl, %rdx +; LIN-NEXT: testb $64, %r10b +; LIN-NEXT: movq %rdx, %rdi ; LIN-NEXT: cmovneq %r9, %rdi ; LIN-NEXT: movb $-128, %cl ; LIN-NEXT: subb %sil, %cl @@ -233,9 +230,9 @@ ; LIN-NEXT: cmoveq %r9, %rsi ; LIN-NEXT: movq %rsi, 16(%rax) ; LIN-NEXT: xorl %esi, %esi -; LIN-NEXT: movl %edx, %ecx +; LIN-NEXT: movl %r10d, %ecx ; LIN-NEXT: shldq %cl, %r8, %rsi -; LIN-NEXT: cmovneq %r10, %rsi +; LIN-NEXT: cmovneq %rdx, %rsi ; LIN-NEXT: cmovnsq %r9, %rsi ; LIN-NEXT: cmoveq %r9, %rsi ; LIN-NEXT: movq %rsi, 24(%rax)