diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -673,6 +673,10 @@ const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const override; + llvm::Optional> + getMovdCompatibleReg(MCRegister OldReg, uint32_t OldRegBits, + const TargetRegisterInfo *TRI) const; + MachineInstr * spill2RegInsertToVectorReg(Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *MBB, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -9517,6 +9517,8 @@ switch (MemBits) { case 64: case 32: + case 16: + case 8: return true; } return false; @@ -9579,6 +9581,8 @@ static unsigned getInsertOrExtractOpcode(unsigned Bits, bool Insert) { switch (Bits) { + case 8: + case 16: case 32: return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr; case 64: @@ -9588,6 +9592,37 @@ } } +/// \Returns the subreg index for a getting a subregister of \p SubregBits from +/// a register of \p RegBits. +static unsigned spill2RegGetSubregIdx(unsigned RegBits, unsigned SubregBits) { + assert(RegBits > SubregBits && "From expected to cover To"); + switch (SubregBits) { + case 32: + return X86::sub_32bit; + case 16: + return X86::sub_16bit; + case 8: + return X86::sub_8bit; + default: + llvm_unreachable("FIXME"); + } +} + +llvm::Optional> +X86InstrInfo::getMovdCompatibleReg(MCRegister OldReg, uint32_t OldRegBits, + const TargetRegisterInfo *TRI) const { + if (OldRegBits != 8 && OldRegBits != 16) + return None; + // The register class of the register that movd can handle. + const TargetRegisterClass *NewRegClass = + TRI->getRegClass(X86::GR32RegClassID); + uint32_t NewRegBits = TRI->getRegSizeInBits(*NewRegClass); + unsigned NewSubregIdx = spill2RegGetSubregIdx(NewRegBits, OldRegBits); + MCRegister NewReg = + TRI->getMatchingSuperReg(OldReg, NewSubregIdx, NewRegClass); + return std::make_pair(NewReg, NewSubregIdx); +} + MachineInstr *X86InstrInfo::spill2RegInsertToVectorReg( Register DstReg, Register SrcReg, int OperationBits, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertBeforeIt, @@ -9596,9 +9631,17 @@ unsigned InsertOpcode = getInsertOrExtractOpcode(OperationBits, true /*insert*/); const MCInstrDesc &InsertMCID = get(InsertOpcode); + // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit + // register with an 8/16 bit subreg: + // $xmm0 = MOVPDI2DIrr $eax.sub_8bit + auto NewRegAndSubregIdx = getMovdCompatibleReg(SrcReg, OperationBits, TRI); + if (NewRegAndSubregIdx) + SrcReg = NewRegAndSubregIdx->first; MachineInstr *InsertMI = BuildMI(*MBB, InsertBeforeIt, DL, InsertMCID, DstReg).addReg(SrcReg); InsertMI->addRegisterKilled(DstReg, TRI); + if (NewRegAndSubregIdx) + InsertMI->getOperand(1).setSubReg(NewRegAndSubregIdx->second); return InsertMI; } @@ -9610,10 +9653,18 @@ unsigned ExtractOpcode = getInsertOrExtractOpcode(OperationBits, false /*extract*/); const MCInstrDesc &ExtractMCID = get(ExtractOpcode); + // `movd` does not support 8/16 bit operands. Instead, we use a 32-bit + // register with an 8/16 bit subreg: + // $eax.sub_8bit = MOVPDI2DIrr $xmm0 + auto NewRegAndSubregIdx = getMovdCompatibleReg(DstReg, OperationBits, TRI); + if (NewRegAndSubregIdx) + DstReg = NewRegAndSubregIdx->first; MachineInstr *ExtractMI = BuildMI(*InsertMBB, InsertBeforeIt, DL, ExtractMCID, DstReg) .addReg(SrcReg); ExtractMI->addRegisterKilled(DstReg, TRI); + if (NewRegAndSubregIdx) + ExtractMI->getOperand(0).setSubReg(NewRegAndSubregIdx->second); return ExtractMI; } diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll @@ -67,8 +67,8 @@ ; CHECK-NEXT: .cfi_offset %r14, -32 ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movzwl D0(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw D0(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm3 ; CHECK-NEXT: movzwl D1(%rip), %ecx ; CHECK-NEXT: movzwl D2(%rip), %edx ; CHECK-NEXT: movzwl D3(%rip), %esi @@ -83,17 +83,17 @@ ; CHECK-NEXT: movzwl D12(%rip), %r15d ; CHECK-NEXT: movzwl D13(%rip), %r12d ; CHECK-NEXT: movzwl D14(%rip), %r13d -; CHECK-NEXT: movzwl D15(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D16(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D17(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D18(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw D15(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movw D16(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movw D17(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm4 +; CHECK-NEXT: movw D18(%rip), %ax +; CHECK-NEXT: movd %eax, %xmm2 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm3, %eax ; CHECK-NEXT: movw %ax, U0(%rip) ; CHECK-NEXT: movw %cx, U1(%rip) ; CHECK-NEXT: movw %dx, U2(%rip) @@ -109,13 +109,13 @@ ; CHECK-NEXT: movw %r15w, U12(%rip) ; CHECK-NEXT: movw %r12w, U13(%rip) ; CHECK-NEXT: movw %r13w, U14(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: movw %ax, U15(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: movw %ax, U16(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm4, %eax ; CHECK-NEXT: movw %ax, U17(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload +; CHECK-NEXT: movd %xmm2, %eax ; CHECK-NEXT: movw %ax, U18(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48 diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll @@ -68,7 +68,7 @@ ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movb D0(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm3 ; CHECK-NEXT: movb D1(%rip), %cl ; CHECK-NEXT: movb D2(%rip), %dl ; CHECK-NEXT: movb D3(%rip), %sil @@ -84,16 +84,16 @@ ; CHECK-NEXT: movb D13(%rip), %r12b ; CHECK-NEXT: movb D14(%rip), %r13b ; CHECK-NEXT: movb D15(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: movb D16(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm1 ; CHECK-NEXT: movb D17(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm4 ; CHECK-NEXT: movb D18(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movd %eax, %xmm2 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm3, %eax ; CHECK-NEXT: movb %al, U0(%rip) ; CHECK-NEXT: movb %cl, U1(%rip) ; CHECK-NEXT: movb %dl, U2(%rip) @@ -109,13 +109,13 @@ ; CHECK-NEXT: movb %r15b, U12(%rip) ; CHECK-NEXT: movb %r12b, U13(%rip) ; CHECK-NEXT: movb %r13b, U14(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: movb %al, U15(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: movb %al, U16(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm4, %eax ; CHECK-NEXT: movb %al, U17(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; CHECK-NEXT: movd %xmm2, %eax ; CHECK-NEXT: movb %al, U18(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48