Index: lib/Target/X86/X86InstrArithmetic.td =================================================================== --- lib/Target/X86/X86InstrArithmetic.td +++ lib/Target/X86/X86InstrArithmetic.td @@ -913,8 +913,8 @@ let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_RF; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#8rr : BinOpRR_RF; def NAME#16rr : BinOpRR_RF; def NAME#32rr : BinOpRR_RF; def NAME#64rr : BinOpRR_RF; @@ -931,9 +931,9 @@ def NAME#32rm : BinOpRM_RF; def NAME#64rm : BinOpRM_RF; - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -584,9 +584,9 @@ const MachineOperand *&Destination) const override; private: - /// This is a helper for convertToThreeAddress for 16-bit instructions. + /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions. /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit - /// super-register and then truncating back down to a 16-bit sub-register. + /// super-register and then truncating back down to a 8/16-bit sub-register. MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -797,6 +797,13 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { + // We handle 8-bit adds and various 16-bit opcodes in the switch below. + bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri); + MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); + assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( + *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && + "Unexpected type for LEA transform"); + // TODO: For a 32-bit target, we need to adjust the LEA variables with // something like this: // Opcode = X86::LEA32r; @@ -807,13 +814,12 @@ if (!Subtarget.is64Bit()) return nullptr; - MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); unsigned Opcode = X86::LEA64_32r; unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); // Build and insert into an implicit UNDEF value. This is OK because - // we will be shifting and then extracting the lower 16-bits. + // we will be shifting and then extracting the lower 8/16-bits. // This has the potential to cause partial register stall. e.g. // movw (%rbp,%rcx,2), %dx // leal -65(%rdx), %esi @@ -824,11 +830,12 @@ unsigned Src = MI.getOperand(1).getReg(); bool IsDead = MI.getOperand(0).isDead(); bool IsKill = MI.getOperand(1).isKill(); + unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit; assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization"); BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA); MachineInstr *InsMI = BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(InRegLEA, RegState::Define, X86::sub_16bit) + .addReg(InRegLEA, RegState::Define, SubReg) .addReg(Src, getKillRegState(IsKill)); MachineInstrBuilder MIB = @@ -847,12 +854,14 @@ case X86::DEC16r: addRegOffset(MIB, InRegLEA, true, -1); break; + case X86::ADD8ri: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri_DB: case X86::ADD16ri8_DB: addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); break; + case X86::ADD8rr: case X86::ADD16rr: case X86::ADD16rr_DB: { unsigned Src2 = MI.getOperand(2).getReg(); @@ -861,7 +870,7 @@ unsigned InRegLEA2 = 0; MachineInstr *InsMI2 = nullptr; if (Src == Src2) { - // ADD16rr killed %reg1028, %reg1028 + // ADD8rr/ADD16rr killed %reg1028, %reg1028 // just a single insert_subreg. addRegReg(MIB, InRegLEA, true, InRegLEA, false); } else { @@ -870,10 +879,10 @@ else InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because - // we will be shifting and then extracting the lower 16-bits. + // we will be shifting and then extracting the lower 8/16-bits. BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2); InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY)) - .addReg(InRegLEA2, RegState::Define, X86::sub_16bit) + .addReg(InRegLEA2, RegState::Define, SubReg) .addReg(Src2, getKillRegState(IsKill2)); addRegReg(MIB, InRegLEA, true, InRegLEA2, true); } @@ -887,7 +896,7 @@ MachineInstr *ExtMI = BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY)) .addReg(Dest, RegState::Define | getDeadRegState(IsDead)) - .addReg(OutRegLEA, RegState::Kill, X86::sub_16bit); + .addReg(OutRegLEA, RegState::Kill, SubReg); if (LV) { // Update live variables. @@ -1084,6 +1093,7 @@ LV->replaceKillInstruction(SrcReg2, MI, *NewMI); break; } + case X86::ADD8rr: case X86::ADD16rr: case X86::ADD16rr_DB: return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); @@ -1119,6 +1129,7 @@ NewMI = addOffset(MIB, MI.getOperand(2)); break; } + case X86::ADD8ri: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri_DB: Index: test/CodeGen/X86/GlobalISel/add-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/add-scalar.ll +++ test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -57,8 +57,9 @@ define i8 @test_add_i8(i8 %arg1, i8 %arg2) { ; X64-LABEL: test_add_i8: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: addb %dil, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: leal (%rsi,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; Index: test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll =================================================================== --- test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll +++ test/CodeGen/X86/GlobalISel/shl-scalar-widening.ll @@ -11,8 +11,9 @@ ; X64-LABEL: test_shl_i4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: addb %sil, %cl +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edx killed $edx def $rdx +; X64-NEXT: leal (%rdx,%rsi), %ecx ; X64-NEXT: andb $15, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shlb %cl, %al Index: test/CodeGen/X86/GlobalISel/shl-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/shl-scalar.ll +++ test/CodeGen/X86/GlobalISel/shl-scalar.ll @@ -147,8 +147,8 @@ define i8 @test_shl_i8_imm1(i32 %arg1) { ; X64-LABEL: test_shl_i8_imm1: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: addb %al, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i8 Index: test/CodeGen/X86/fixup-bw-copy.ll =================================================================== --- test/CodeGen/X86/fixup-bw-copy.ll +++ test/CodeGen/X86/fixup-bw-copy.ll @@ -43,9 +43,10 @@ define i8 @test_movb_hreg(i16 %a0) { ; X64-LABEL: test_movb_hreg: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrl $8, %eax -; X64-NEXT: addb %dil, %al +; X64-NEXT: leal (%rax,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; Index: test/CodeGen/X86/fshr.ll =================================================================== --- test/CodeGen/X86/fshr.ll +++ test/CodeGen/X86/fshr.ll @@ -358,9 +358,9 @@ ; ; X64-LABEL: const_shift_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: shrb $7, %sil -; X64-NEXT: addb %al, %al +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: orb %sil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq Index: test/CodeGen/X86/iabs.ll =================================================================== --- test/CodeGen/X86/iabs.ll +++ test/CodeGen/X86/iabs.ll @@ -21,10 +21,10 @@ ; ; X64-LABEL: test_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: movl %edi, %ecx ; X64-NEXT: sarb $7, %cl -; X64-NEXT: addb %cl, %al +; X64-NEXT: leal (%rdi,%rcx), %eax ; X64-NEXT: xorb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq Index: test/CodeGen/X86/mul-constant-i8.ll =================================================================== --- test/CodeGen/X86/mul-constant-i8.ll +++ test/CodeGen/X86/mul-constant-i8.ll @@ -14,8 +14,8 @@ define i8 @test_mul_by_2(i8 %x) { ; X64-LABEL: test_mul_by_2: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: addb %al, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %m = mul i8 %x, 2 Index: test/CodeGen/X86/popcnt.ll =================================================================== --- test/CodeGen/X86/popcnt.ll +++ test/CodeGen/X86/popcnt.ll @@ -25,6 +25,7 @@ ; ; X64-LABEL: cnt8: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrb %al ; X64-NEXT: andb $85, %al @@ -36,8 +37,9 @@ ; X64-NEXT: addb %al, %dil ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrb $4, %al -; X64-NEXT: addb %dil, %al +; X64-NEXT: leal (%rax,%rdi), %eax ; X64-NEXT: andb $15, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; ; X32-POPCNT-LABEL: cnt8: Index: test/CodeGen/X86/pr23664.ll =================================================================== --- test/CodeGen/X86/pr23664.ll +++ test/CodeGen/X86/pr23664.ll @@ -4,8 +4,8 @@ define i2 @f(i32 %arg) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: addb %al, %al +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (%rdi,%rdi), %eax ; CHECK-NEXT: orb $1, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -642,9 +642,9 @@ ; ; X64-LABEL: rotate_demanded_bits_3: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %ecx +; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: movl %edi, %eax -; X64-NEXT: addb %cl, %cl +; X64-NEXT: leal (%rsi,%rsi), %ecx ; X64-NEXT: andb $30, %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: roll %cl, %eax Index: test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- test/CodeGen/X86/scheduler-backtracking.ll +++ test/CodeGen/X86/scheduler-backtracking.ll @@ -19,29 +19,28 @@ ; ILP-NEXT: incl %esi ; ILP-NEXT: addb %sil, %sil ; ILP-NEXT: orb $1, %sil -; ILP-NEXT: movl $1, %r9d +; ILP-NEXT: movl $1, %r10d ; ILP-NEXT: xorl %r14d, %r14d ; ILP-NEXT: movl %esi, %ecx -; ILP-NEXT: shldq %cl, %r9, %r14 +; ILP-NEXT: shldq %cl, %r10, %r14 ; ILP-NEXT: movl $1, %edx ; ILP-NEXT: shlq %cl, %rdx -; ILP-NEXT: movl %esi, %r11d -; ILP-NEXT: addb $-128, %r11b -; ILP-NEXT: movb $-128, %r10b +; ILP-NEXT: leal -128(%rsi), %r9d +; ILP-NEXT: movb $-128, %r11b ; ILP-NEXT: xorl %ebx, %ebx -; ILP-NEXT: movl %r11d, %ecx -; ILP-NEXT: shldq %cl, %r9, %rbx +; ILP-NEXT: movl %r9d, %ecx +; ILP-NEXT: shldq %cl, %r10, %rbx ; ILP-NEXT: testb $64, %sil ; ILP-NEXT: cmovneq %rdx, %r14 ; ILP-NEXT: cmovneq %r8, %rdx ; ILP-NEXT: movl $1, %edi ; ILP-NEXT: shlq %cl, %rdi -; ILP-NEXT: subb %sil, %r10b -; ILP-NEXT: movl %r10d, %ecx -; ILP-NEXT: shrdq %cl, %r8, %r9 -; ILP-NEXT: testb $64, %r10b -; ILP-NEXT: cmovneq %r8, %r9 +; ILP-NEXT: subb %sil, %r11b +; ILP-NEXT: movl %r11d, %ecx +; ILP-NEXT: shrdq %cl, %r8, %r10 ; ILP-NEXT: testb $64, %r11b +; ILP-NEXT: cmovneq %r8, %r10 +; ILP-NEXT: testb $64, %r9b ; ILP-NEXT: cmovneq %rdi, %rbx ; ILP-NEXT: cmovneq %r8, %rdi ; ILP-NEXT: testb %sil, %sil @@ -52,7 +51,7 @@ ; ILP-NEXT: cmovnsq %r8, %rbx ; ILP-NEXT: cmoveq %r8, %rbx ; ILP-NEXT: movq %rbx, 24(%rax) -; ILP-NEXT: cmovnsq %r9, %rdi +; ILP-NEXT: cmovnsq %r10, %rdi ; ILP-NEXT: cmoveq %r8, %rdi ; ILP-NEXT: movq %rdi, 16(%rax) ; ILP-NEXT: popq %rbx @@ -76,7 +75,7 @@ ; HYBRID-NEXT: xorl %r10d, %r10d ; HYBRID-NEXT: movl %esi, %ecx ; HYBRID-NEXT: shldq %cl, %r11, %r10 -; HYBRID-NEXT: addb $-128, %cl +; HYBRID-NEXT: leal -128(%rsi), %ecx ; HYBRID-NEXT: xorl %edi, %edi ; HYBRID-NEXT: shldq %cl, %r11, %rdi ; HYBRID-NEXT: movl $1, %edx @@ -119,7 +118,7 @@ ; BURR-NEXT: xorl %r10d, %r10d ; BURR-NEXT: movl %esi, %ecx ; BURR-NEXT: shldq %cl, %r11, %r10 -; BURR-NEXT: addb $-128, %cl +; BURR-NEXT: leal -128(%rsi), %ecx ; BURR-NEXT: xorl %edi, %edi ; BURR-NEXT: shldq %cl, %r11, %rdi ; BURR-NEXT: movl $1, %edx @@ -160,8 +159,7 @@ ; SRC-NEXT: shrdq %cl, %r8, %r10 ; SRC-NEXT: testb $64, %cl ; SRC-NEXT: cmovneq %r8, %r10 -; SRC-NEXT: movl %esi, %r9d -; SRC-NEXT: addb $-128, %r9b +; SRC-NEXT: leal -128(%rsi), %r9d ; SRC-NEXT: xorl %edx, %edx ; SRC-NEXT: movl %r9d, %ecx ; SRC-NEXT: shldq %cl, %rdi, %rdx @@ -215,13 +213,12 @@ ; LIN-NEXT: cmovneq %rdx, %rdi ; LIN-NEXT: cmovsq %r9, %rdi ; LIN-NEXT: movq %rdi, 8(%rax) -; LIN-NEXT: movl %esi, %edx -; LIN-NEXT: addb $-128, %dl -; LIN-NEXT: movl $1, %r10d -; LIN-NEXT: movl %edx, %ecx -; LIN-NEXT: shlq %cl, %r10 -; LIN-NEXT: testb $64, %dl -; LIN-NEXT: movq %r10, %rdi +; LIN-NEXT: leal -128(%rsi), %r10d +; LIN-NEXT: movl $1, %edx +; LIN-NEXT: movl %r10d, %ecx +; LIN-NEXT: shlq %cl, %rdx +; LIN-NEXT: testb $64, %r10b +; LIN-NEXT: movq %rdx, %rdi ; LIN-NEXT: cmovneq %r9, %rdi ; LIN-NEXT: movb $-128, %cl ; LIN-NEXT: subb %sil, %cl @@ -233,9 +230,9 @@ ; LIN-NEXT: cmoveq %r9, %rsi ; LIN-NEXT: movq %rsi, 16(%rax) ; LIN-NEXT: xorl %esi, %esi -; LIN-NEXT: movl %edx, %ecx +; LIN-NEXT: movl %r10d, %ecx ; LIN-NEXT: shldq %cl, %r8, %rsi -; LIN-NEXT: cmovneq %r10, %rsi +; LIN-NEXT: cmovneq %rdx, %rsi ; LIN-NEXT: cmovnsq %r9, %rsi ; LIN-NEXT: cmoveq %r9, %rsi ; LIN-NEXT: movq %rsi, 24(%rax)