diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2050,6 +2050,20 @@ llvm_unreachable("Target does not implement this"); } + /// If insert/extracts of size \p MemBits require a new register, return the + /// register class. + virtual const TargetRegisterClass * + spill2RegInsertOrExtractRequiresNewReg(unsigned MemBits, + const TargetRegisterInfo *TRI) const { + llvm_unreachable("Target does not implement this"); + } + + /// \Returns the subreg index for converting \p FromBits to \p ToBits. + virtual unsigned spill2RegGetSubregIdx(unsigned FromBits, unsigned ToBits, + const TargetRegisterInfo *TRI) const { + llvm_unreachable("Target does not implement this"); + } + private: mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/llvm/lib/CodeGen/Spill2Reg.cpp b/llvm/lib/CodeGen/Spill2Reg.cpp --- a/llvm/lib/CodeGen/Spill2Reg.cpp +++ b/llvm/lib/CodeGen/Spill2Reg.cpp @@ -112,13 +112,22 @@ const LiveRegUnits &LRU); /// Helper for generateCode(). It eplaces stack spills or reloads with movs /// to \p LI.reg(). - void replaceStackWithReg(StackSlotDataEntry &Entry, Register VectorReg); + void replaceStackWithReg(StackSlotDataEntry &Entry, Register VectorReg, + int StackSlot); /// Updates the live-ins of MBBs after we emit the new spill2reg instructions /// and the vector registers become live from register spills to reloads. void updateLiveIns(StackSlotDataEntry &Entry, MCRegister VectorReg); /// Updates \p LRU with the liveness of physical registers around the spills /// and reloads in \p Entry. void calculateLiveRegs(StackSlotDataEntry &Entry, LiveRegUnits &LRU); + /// Replaces all occurences of \p OldReg with \p NewReg and sets the subreg + /// accordingly. Helper for replaceStackWithReg(). + void replaceRegAndSubreg(Register OldReg, Register NewReg, + const MachineInstr *S2RMI, unsigned NewSubregIdx); + /// If the insert/extracts touch wider registers than \p MemBits, then \p + /// OldReg requires a subreg. This is taken care of in this method. + void replaceWithSubregIfRequired(uint32_t MemBits, Register OldReg, + MachineInstr *MI, int StackSlot); /// Replace spills to stack with spills to registers (same for reloads). void generateCode(); /// Cleanup data structures once the pass is finished. @@ -179,14 +188,22 @@ /// The checks for collecting spills and reloads are identical, so we keep /// them here in one place. Return true if we should not collect this. - auto SkipEntry = [this](int StackSlot, unsigned MemBits, - unsigned Opcode) -> bool { + auto SkipEntry = [this](int StackSlot, unsigned MemBits, unsigned Opcode, + const MachineOperand *MO) -> bool { // If not a spill/reload stack slot. if (!MFI->isSpillSlotObjectIndex(StackSlot)) return true; // Check size in bits. if (!TII->isLegalToSpill2Reg(MemBits, Opcode)) return true; + // If we need a subregister, then we need more checks: + if (TII->spill2RegInsertOrExtractRequiresNewReg(MemBits, TRI) && + llvm::any_of(MRI->reg_operands(MO->getReg()), [](const auto &MO) { + // Skip if any MO is non renamable or if MO's parent instr is a pseudo + return !MO.isRenamable() || MO.getParent()->isPseudo(); + })) + return true; + return false; }; @@ -209,7 +226,7 @@ MachineInstr *Spill = &MI; auto &Entry = StackSlotData[StackSlot]; unsigned MemBits = TRI->getRegSizeInBits(MO->getReg(), *MRI); - if (SkipEntry(StackSlot, MemBits, MI.getOpcode())) { + if (SkipEntry(StackSlot, MemBits, MI.getOpcode(), MO)) { Entry.Disable = true; continue; } @@ -227,7 +244,7 @@ MachineInstr *Reload = &MI; auto &Entry = StackSlotData[StackSlot]; unsigned MemBits = TRI->getRegSizeInBits(MO->getReg(), *MRI); - if (SkipEntry(StackSlot, MemBits, MI.getOpcode())) { + if (SkipEntry(StackSlot, MemBits, MI.getOpcode(), MO)) { Entry.Disable = true; continue; } @@ -332,9 +349,54 @@ } } +void Spill2Reg::replaceRegAndSubreg(Register OldReg, Register NewReg, + const MachineInstr *S2RMI, + unsigned NewSubregIdx) { + for (MachineOperand &MO : + llvm::make_early_inc_range(MRI->reg_operands(OldReg))) { + const MachineInstr *ModifiedI = MO.getParent(); + assert((MO.isRenamable() || ModifiedI == S2RMI) && + "Should have been discarded earlier"); + MO.setReg(NewReg); + MO.setIsRenamable(true); + // The instr emitted by Spill2Reg does not need a subreg, so skip. + if (ModifiedI == S2RMI) + continue; + // Unless we mark a def as 'undef' the rest of subregister will be + // considered as being read, which creates a use before a def. + if (MO.isDef()) + MO.setIsUndef(); + MO.setSubReg(NewSubregIdx); + } +} + +void Spill2Reg::replaceWithSubregIfRequired(uint32_t MemBits, Register OldReg, + MachineInstr *MI, int StackSlot) { + const TargetRegisterClass *NewRegClass = + TII->spill2RegInsertOrExtractRequiresNewReg(MemBits, TRI); + // Early return if the target does not support this feature. + if (NewRegClass == nullptr) + return; + + // If multiple reloads are writing to the same OldReg, then a previous + // invocation of this function will have already replaced the register with + // the new one. In that case do nothing. + uint32_t OldRegBits = TRI->getRegSizeInBits(OldReg, *MRI); + uint32_t NewRegBits = TRI->getRegSizeInBits(*NewRegClass); + if (NewRegBits == OldRegBits) + return; + + // Get the new register and replace instances of OldReg with NewReg. + unsigned NewSubregIdx = + TII->spill2RegGetSubregIdx(NewRegBits, OldRegBits, TRI); + MCRegister NewReg = + TRI->getMatchingSuperReg(OldReg.asMCReg(), NewSubregIdx, NewRegClass); + replaceRegAndSubreg(OldReg, NewReg, MI, NewSubregIdx); +} + // Replace stack-based spills/reloads with register-based ones. void Spill2Reg::replaceStackWithReg(StackSlotDataEntry &Entry, - Register VectorReg) { + Register VectorReg, int StackSlot) { for (StackSlotDataEntry::MIData &SpillData : Entry.Spills) { MachineInstr *StackSpill = SpillData.MI; assert(SpillData.MO->isReg() && "Expected register MO"); @@ -349,6 +411,9 @@ // Spill to stack is no longer needed. StackSpill->eraseFromParent(); + // Set subregister if required. + replaceWithSubregIfRequired(SpillData.MemBits, OldReg, SpillToVector, + StackSlot); assert(OldReg.isPhysical() && "Otherwise we need to removeInterval()"); } @@ -366,6 +431,8 @@ // Reload from stack is no longer needed. StackReload->eraseFromParent(); + replaceWithSubregIfRequired(ReloadData.MemBits, OldReg, ReloadFromReg, + StackSlot); assert(OldReg.isPhysical() && "Otherwise we need to removeInterval()"); } } @@ -482,7 +549,8 @@ updateLiveIns(Entry, *PhysVectorRegOpt); // Replace stack accesses with register accesses. - replaceStackWithReg(Entry, *PhysVectorRegOpt); + int StackSlot = Pair.first; + replaceStackWithReg(Entry, *PhysVectorRegOpt, StackSlot); NumSpill2RegInstrs += Entry.Spills.size() + Entry.Reloads.size(); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -684,8 +684,13 @@ int OperationBits, MachineBasicBlock *InsertMBB, MachineBasicBlock::iterator InsertBeforeIt, const TargetRegisterInfo *TRI) const override; -}; + const TargetRegisterClass *spill2RegInsertOrExtractRequiresNewReg( + unsigned MemBytes, const TargetRegisterInfo *TRI) const override; + + unsigned spill2RegGetSubregIdx(unsigned FromBits, unsigned ToBits, + const TargetRegisterInfo *TRI) const override; +}; } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -9517,6 +9517,8 @@ switch (MemBits) { case 64: case 32: + case 16: + case 8: return true; } return false; @@ -9579,6 +9581,9 @@ static unsigned getInsertOrExtractOpcode(unsigned Bits, bool Insert) { switch (Bits) { + case 8: + case 16: + return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr; case 32: return Insert ? X86::MOVDI2PDIrr : X86::MOVPDI2DIrr; case 64: @@ -9617,5 +9622,35 @@ return ExtractMI; } +const TargetRegisterClass *X86InstrInfo::spill2RegInsertOrExtractRequiresNewReg( + unsigned MemBits, const TargetRegisterInfo *TRI) const { + switch (MemBits) { + case 8: + case 16: + return TRI->getRegClass(X86::GR32RegClassID); + default: + return nullptr; + } +} + +unsigned +X86InstrInfo::spill2RegGetSubregIdx(unsigned FromBits, unsigned ToBits, + const TargetRegisterInfo *TRI) const { + if (FromBits == ToBits) + return 0; + + assert(FromBits > ToBits && "From expected to cover To"); + switch (ToBits) { + case 32: + return X86::sub_32bit; + case 16: + return X86::sub_16bit; + case 8: + return X86::sub_8bit; + default: + llvm_unreachable("FIXME"); + } +} + #define GET_INSTRINFO_HELPERS #include "X86GenInstrInfo.inc" diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_16bit.ll @@ -67,8 +67,8 @@ ; CHECK-NEXT: .cfi_offset %r14, -32 ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movzwl D0(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw D0(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm3 ; CHECK-NEXT: movzwl D1(%rip), %ecx ; CHECK-NEXT: movzwl D2(%rip), %edx ; CHECK-NEXT: movzwl D3(%rip), %esi @@ -83,18 +83,18 @@ ; CHECK-NEXT: movzwl D12(%rip), %r15d ; CHECK-NEXT: movzwl D13(%rip), %r12d ; CHECK-NEXT: movzwl D14(%rip), %r13d -; CHECK-NEXT: movzwl D15(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D16(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D17(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movzwl D18(%rip), %eax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; CHECK-NEXT: movw D15(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movw D16(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movw D17(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm4 +; CHECK-NEXT: movw D18(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm2 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload -; CHECK-NEXT: movw %ax, U0(%rip) +; CHECK-NEXT: movd %xmm3, %eax +; CHECK-NEXT: movw %eax, U0(%rip) ; CHECK-NEXT: movw %cx, U1(%rip) ; CHECK-NEXT: movw %dx, U2(%rip) ; CHECK-NEXT: movw %si, U3(%rip) @@ -109,14 +109,14 @@ ; CHECK-NEXT: movw %r15w, U12(%rip) ; CHECK-NEXT: movw %r12w, U13(%rip) ; CHECK-NEXT: movw %r13w, U14(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload -; CHECK-NEXT: movw %ax, U15(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload -; CHECK-NEXT: movw %ax, U16(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload -; CHECK-NEXT: movw %ax, U17(%rip) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload -; CHECK-NEXT: movw %ax, U18(%rip) +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: movw %eax, U15(%rip) +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: movw %eax, U16(%rip) +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: movw %eax, U17(%rip) +; CHECK-NEXT: movd %xmm2, %eax +; CHECK-NEXT: movw %eax, U18(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll --- a/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll +++ b/llvm/test/CodeGen/X86/spill2reg_end_to_end_8bit.ll @@ -67,8 +67,8 @@ ; CHECK-NEXT: .cfi_offset %r14, -32 ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movb D0(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb D0(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm3 ; CHECK-NEXT: movb D1(%rip), %cl ; CHECK-NEXT: movb D2(%rip), %dl ; CHECK-NEXT: movb D3(%rip), %sil @@ -83,18 +83,18 @@ ; CHECK-NEXT: movb D12(%rip), %r15b ; CHECK-NEXT: movb D13(%rip), %r12b ; CHECK-NEXT: movb D14(%rip), %r13b -; CHECK-NEXT: movb D15(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb D16(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb D17(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb D18(%rip), %al -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb D15(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movb D16(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movb D17(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm4 +; CHECK-NEXT: movb D18(%rip), %eax +; CHECK-NEXT: movd %eax, %xmm2 ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: movb %al, U0(%rip) +; CHECK-NEXT: movd %xmm3, %eax +; CHECK-NEXT: movb %eax, U0(%rip) ; CHECK-NEXT: movb %cl, U1(%rip) ; CHECK-NEXT: movb %dl, U2(%rip) ; CHECK-NEXT: movb %sil, U3(%rip) @@ -109,14 +109,14 @@ ; CHECK-NEXT: movb %r15b, U12(%rip) ; CHECK-NEXT: movb %r12b, U13(%rip) ; CHECK-NEXT: movb %r13b, U14(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: movb %al, U15(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: movb %al, U16(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: movb %al, U17(%rip) -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: movb %al, U18(%rip) +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: movb %eax, U15(%rip) +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: movb %eax, U16(%rip) +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: movb %eax, U17(%rip) +; CHECK-NEXT: movd %xmm2, %eax +; CHECK-NEXT: movb %eax, U18(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: popq %r12