Index: llvm/lib/Target/X86/X86InstrInfo.h =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.h +++ llvm/lib/Target/X86/X86InstrInfo.h @@ -374,6 +374,11 @@ int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + void spillSpecialReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Opc, Register Reg, + int FrameIdx, bool isKill = false) const; + bool expandPostRAPseudo(MachineInstr &MI) const override; /// Check whether the target can fold a load that feeds a subreg operand Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3550,6 +3550,17 @@ return None; } +static unsigned getLoadStoreOpcodeForFP16s(bool load, const X86Subtarget &STI) { + if (STI.hasFP16()) + return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr; + if (load) + return STI.hasAVX512() ? X86::VMOVSSZrm + : STI.hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; + else + return STI.hasAVX512() ? X86::VMOVSSZmr + : STI.hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr; +} + static unsigned getLoadStoreRegOpcode(Register Reg, const TargetRegisterClass *RC, bool IsStackAligned, @@ -3599,10 +3610,9 @@ X86::VK8PAIRRegClass.hasSubClassEq(RC) || X86::VK16PAIRRegClass.hasSubClassEq(RC)) return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE; - if ((X86::FR16RegClass.hasSubClassEq(RC) || - X86::FR16XRegClass.hasSubClassEq(RC)) && - STI.hasFP16()) - return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr; + if (X86::FR16RegClass.hasSubClassEq(RC) || + X86::FR16XRegClass.hasSubClassEq(RC)) + return getLoadStoreOpcodeForFP16s(load, STI); llvm_unreachable("Unknown 4-byte regclass"); case 8: if (X86::GR64RegClass.hasSubClassEq(RC)) @@ -3679,6 +3689,10 @@ return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; else return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; + case 1024: + assert(X86::TILERegClass.hasSubClassEq(RC) && "Unknown 1024-byte regclass"); + assert(STI.hasAMXTILE() && "Using 8*1024-bit register requires AMX-TILE"); + return load ? X86::TILELOADD : X86::TILESTORED; } } @@ -3835,6 +3849,50 @@ return getLoadStoreRegOpcode(DestReg, RC, IsStackAligned, STI, true); } +static bool SpecialOpcode(unsigned Opc) { + switch (Opc) { + default: + return false; + case X86::TILELOADD: + case X86::TILESTORED: + return true; + } +} + +void X86InstrInfo::spillSpecialReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Opc, Register Reg, + int FrameIdx, bool isKill) const { + switch (Opc) { + default: + llvm_unreachable("Unexpected special opcode!"); + case X86::TILESTORED: { + // tilestored %tmm, (%sp, %idx) + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); + Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); + BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64); + MachineInstr *NewMI = + addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) + .addReg(Reg, getKillRegState(isKill)); + MachineOperand &MO = NewMI->getOperand(X86::AddrIndexReg); + MO.setReg(VirtReg); + MO.setIsKill(true); + break; + } + case X86::TILELOADD: { + // tileloadd (%sp, %idx), %tmm + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); + Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); + BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64); + MachineInstr *NewMI = addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), Reg), FrameIdx); + MachineOperand &MO = NewMI->getOperand(1 + X86::AddrIndexReg); + MO.setReg(VirtReg); + MO.setIsKill(true); + break; + } + } +} + void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIdx, @@ -3842,37 +3900,20 @@ const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Stack slot too small for store"); - if (RC->getID() == X86::TILERegClassID) { - unsigned Opc = X86::TILESTORED; - // tilestored %tmm, (%sp, %idx) - Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); - BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64); - MachineInstr *NewMI = - addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) - .addReg(SrcReg, getKillRegState(isKill)); - MachineOperand &MO = NewMI->getOperand(2); - MO.setReg(VirtReg); - MO.setIsKill(true); - } else if ((RC->getID() == X86::FR16RegClassID || - RC->getID() == X86::FR16XRegClassID) && - !Subtarget.hasFP16()) { - unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZmr - : Subtarget.hasAVX() ? X86::VMOVSSmr - : X86::MOVSSmr; - addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) - .addReg(SrcReg, getKillRegState(isKill)); - } else { - unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); - bool isAligned = - (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || - (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); + + unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); + bool isAligned = + (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || + (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); + + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); + if (SpecialOpcode(Opc)) + spillSpecialReg(MBB, MI, Opc, SrcReg, FrameIdx, isKill); + else addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx) - .addReg(SrcReg, getKillRegState(isKill)); - } + .addReg(SrcReg, getKillRegState(isKill)); } void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -3884,35 +3925,17 @@ const MachineFrameInfo &MFI = MF.getFrameInfo(); assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Load size exceeds stack slot"); - if (RC->getID() == X86::TILERegClassID) { - unsigned Opc = X86::TILELOADD; - // tileloadd (%sp, %idx), %tmm - MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); - Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); - MachineInstr *NewMI = - BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64); - NewMI = addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), - FrameIdx); - MachineOperand &MO = NewMI->getOperand(3); - MO.setReg(VirtReg); - MO.setIsKill(true); - } else if ((RC->getID() == X86::FR16RegClassID || - RC->getID() == X86::FR16XRegClassID) && - !Subtarget.hasFP16()) { - unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZrm - : Subtarget.hasAVX() ? X86::VMOVSSrm - : X86::MOVSSrm; - addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), - FrameIdx); - } else { - unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); - bool isAligned = - (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || - (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); + unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); + bool isAligned = + (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || + (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx)); + + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); + if (SpecialOpcode(Opc)) + spillSpecialReg(MBB, MI, Opc, DestReg, FrameIdx); + else addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); - } } bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, Index: llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir =================================================================== --- llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir +++ llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s # # This case test tile phi is nested accessed, but the its def block is # not visited yet. Index: llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir =================================================================== --- llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir +++ llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s # # bb.0 # def %0 Index: llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir =================================================================== --- llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir +++ llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s # # bb.0 # def %0