diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -673,6 +673,10 @@ const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const override; + llvm::Optional + getMovdCompatibleReg(MCRegister OldReg, uint32_t OldRegBits, + const TargetRegisterInfo *TRI) const; + MachineInstr * spill2RegInsertToVectorReg(Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *MBB, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -9517,6 +9517,8 @@ switch (MemBits) { case 64: case 32: + case 16: + case 8: return true; } return false; @@ -9579,6 +9581,8 @@ static unsigned getInsertOrExtractOpcode(unsigned Bits, bool Insert) { switch (Bits) { + case 8: + case 16: case 32: return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr; case 64: @@ -9588,6 +9592,36 @@ } } +/// \Returns the subreg index for a getting a subregister of \p SubregBits from +/// a register of \p RegBits. +static unsigned spill2RegGetSubregIdx(unsigned RegBits, unsigned SubregBits) { + assert(RegBits > SubregBits && "From expected to cover To"); + switch (SubregBits) { + case 32: + return X86::sub_32bit; + case 16: + return X86::sub_16bit; + case 8: + return X86::sub_8bit; + default: + llvm_unreachable("FIXME"); + } +} + +llvm::Optional +X86InstrInfo::getMovdCompatibleReg(MCRegister OldReg, uint32_t OldRegBits, + const TargetRegisterInfo *TRI) const { + if (OldRegBits != 8 && OldRegBits != 16) + return None; + // The register class of the register that movd can handle. + const TargetRegisterClass *NewRegClass = + TRI->getRegClass(X86::GR32RegClassID); + unsigned NewRegBits = TRI->getRegSizeInBits(*NewRegClass); + unsigned SubIdx = spill2RegGetSubregIdx(NewRegBits, OldRegBits); + MCRegister NewReg = TRI->getMatchingSuperReg(OldReg, SubIdx, NewRegClass); + return NewReg; +} + MachineInstr *X86InstrInfo::spill2RegInsertToVectorReg( Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertBeforeIt, @@ -9596,6 +9630,12 @@ unsigned InsertOpcode = getInsertOrExtractOpcode(OperationBits, true /*insert*/); const MCInstrDesc &InsertMCID = get(InsertOpcode); + // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit + // register. For example: + // $al = ... + // $xmm0 = MOVPDI2DIrr $eax + if (auto NewReg = getMovdCompatibleReg(SrcReg, OperationBits, TRI)) + SrcReg = *NewReg; MachineInstr *InsertMI = BuildMI(*MBB, InsertBeforeIt, DL, InsertMCID, DstReg).addReg(SrcReg); InsertMI->addRegisterKilled(DstReg, TRI); @@ -9610,6 +9650,12 @@ unsigned ExtractOpcode = getInsertOrExtractOpcode(OperationBits, false /*extract*/); const MCInstrDesc &ExtractMCID = get(ExtractOpcode); + // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit + // register. For example: + // $eax = MOVPDI2DIrr $xmm0 + // ... = $al + if (auto NewReg = getMovdCompatibleReg(DstReg, OperationBits, TRI)) + DstReg = *NewReg; MachineInstr *ExtractMI = BuildMI(*InsertMBB, InsertBeforeIt, DL, ExtractMCID, DstReg) .addReg(SrcReg); diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll @@ -67,8 +67,8 @@ ; CHECK-NEXT: .cfi_offset %r14, -32 ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movzwl D0(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw D0(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm3 ; CHECK-NEXT: movzwl D1(%rip), %ecx ; CHECK-NEXT: movzwl D2(%rip), %edx ; CHECK-NEXT: movzwl D3(%rip), %esi @@ -83,17 +83,17 @@ ; CHECK-NEXT: movzwl D12(%rip), %r15d ; CHECK-NEXT: movzwl D13(%rip), %r12d ; CHECK-NEXT: movzwl D14(%rip), %r13d -; CHECK-NEXT: movzwl D15(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D16(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D17(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D18(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw D15(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movw D16(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movw D17(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm4 +; CHECK-NEXT: movw D18(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm2 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm3, %eax ; CHECK-NEXT: movw %ax, U0(%rip) ; CHECK-NEXT: movw %cx, U1(%rip) ; CHECK-NEXT: movw %dx, U2(%rip) @@ -109,13 +109,13 @@ ; CHECK-NEXT: movw %r15w, U12(%rip) ; CHECK-NEXT: movw %r12w, U13(%rip) ; CHECK-NEXT: movw %r13w, U14(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: movw %ax, U15(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: movw %ax, U16(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm4, %eax ; CHECK-NEXT: movw %ax, U17(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm2, %eax ; CHECK-NEXT: movw %ax, U18(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48 diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll @@ -68,7 +68,7 @@ ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movb D0(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm3 ; CHECK-NEXT: movb D1(%rip), %cl ; CHECK-NEXT: movb D2(%rip), %dl ; CHECK-NEXT: movb D3(%rip), %sil @@ -84,16 +84,16 @@ ; CHECK-NEXT: movb D13(%rip), %r12b ; CHECK-NEXT: movb D14(%rip), %r13b ; CHECK-NEXT: movb D15(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: movb D16(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: movb D17(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm4 ; CHECK-NEXT: movb D18(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm2 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm3, %eax ; CHECK-NEXT: movb %al, U0(%rip) ; CHECK-NEXT: movb %cl, U1(%rip) ; CHECK-NEXT: movb %dl, U2(%rip) @@ -109,13 +109,13 @@ ; CHECK-NEXT: movb %r15b, U12(%rip) ; CHECK-NEXT: movb %r12b, U13(%rip) ; CHECK-NEXT: movb %r13b, U14(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: movb %al, U15(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: movb %al, U16(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm4, %eax ; CHECK-NEXT: movb %al, U17(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm2, %eax ; CHECK-NEXT: movb %al, U18(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48 diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_16bit.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s + +# Simple test with a single 16-bit spill-reload pair: +# spill stack.0 +# reload stack.0 + +--- | + @D0 = dso_local local_unnamed_addr global i32 0, align 4 + @U0 = dso_local local_unnamed_addr global i32 0, align 4 + define void @func() { ret void } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + + bb.0: + ; spill + ; CHECK-LABEL: name: func + ; CHECK: $ax = MOV16rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s16) from @D0) + ; CHECK-NEXT: $xmm0 = MOVDI2PDIrr $eax + ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0 + ; CHECK-NEXT: MOV16mr $rip, 1, $noreg, @U0, $noreg, killed renamable $ax :: (store (s16) into @U0) + ; CHECK-NEXT: RET 0 + $ax = MOV16rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s16) from @D0) + MOV16mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $ax :: (store (s16) into %stack.0) + ; reload + $ax = MOV16rm %stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %stack.0) + MOV16mr $rip, 1, $noreg, @U0, $noreg, killed renamable $ax :: (store (s16) into @U0) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir b/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/spill2reg_simple_1_8bit.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -enable-spill2reg -mattr=+sse4.1 --run-pass=spill2reg -simplify-mir -spill2reg-mem-instrs=0 -spill2reg-vec-instrs=99999 | FileCheck %s + +# Simple test with a single 8-bit spill-reload pair: +# spill stack.0 +# reload stack.0 + +--- | + @D0 = dso_local local_unnamed_addr global i32 0, align 4 + @U0 = dso_local local_unnamed_addr global i32 0, align 4 + define void @func() { ret void } +... +--- +name: func +alignment: 16 +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + + bb.0: + ; spill + ; CHECK-LABEL: name: func + ; CHECK: $al = MOV8rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s8) from @D0) + ; CHECK-NEXT: $xmm0 = MOVDI2PDIrr $eax + ; CHECK-NEXT: $eax = MOVPDI2DIrr $xmm0 + ; CHECK-NEXT: MOV8mr $rip, 1, $noreg, @U0, $noreg, killed renamable $al :: (store (s8) into @U0) + ; CHECK-NEXT: RET 0 + $al = MOV8rm $rip, 1, $noreg, @D0, $noreg :: (dereferenceable load (s8) from @D0) + MOV8mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $al :: (store (s8) into %stack.0) + ; reload + $al = MOV8rm %stack.0, 1, $noreg, 0, $noreg :: (load (s8) from %stack.0) + MOV8mr $rip, 1, $noreg, @U0, $noreg, killed renamable $al :: (store (s8) into @U0) + RET 0 +...