diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -238,6 +238,11 @@ unsigned getCatchReturnOpcode() const { return CatchRetOpcode; } unsigned getReturnOpcode() const { return ReturnOpcode; } + /// Return true if the instruction would configure the register. Typically + /// for X86 AMX, the 2D tile register should be configure the proper shape. + /// The configure would zero the value of all tile registers. + virtual bool isConfigRegister(const MachineInstr &MI) const { return false; } + /// Returns the actual stack pointer adjustment made by an instruction /// as part of a call sequence. By default, only call frame setup/destroy /// instructions adjust the stack, but targets may want to override this diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -41,6 +41,7 @@ class VirtRegMap; class LiveIntervals; class LiveInterval; +class ShapeT; class TargetRegisterClass { public: @@ -424,6 +425,15 @@ return false; } + /// Return true if there is implicit shape for a tile register. Typically + /// for X86 AMX registers, their shape is configured in a configurable + /// register. + virtual bool hasShape(Register PhysReg) const { return false; } + + /// Return the shape for the virtual register. Typically for X86 AMX + /// register, there is implicit shape. + virtual ShapeT getTileShape(MachineRegisterInfo *MRI, Register VirtReg) const; + /// Returns true if Reg contains RegUnit. bool hasRegUnit(MCRegister Reg, Register RegUnit) const { for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TileShapeInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -106,6 +107,11 @@ } }; + /// This map contains shape for each physical register which is allocated + /// to a virtual register. It is useful for 2D register (e.g., X86 AMX + /// register) which implies the shape of dimension of the register. + DenseMap PhysRegShapeMap; + using LiveRegMap = SparseSet; /// This map contains entries for each virtual register that is currently /// available in a physical register. @@ -198,6 +204,11 @@ UsedInInstr.erase(*Units); } + /// Return true if the shape of physical register is the same to the + /// virtual register. + bool isShapeCompatible(Register PhysReg, Register VirtReg); + void setPhysRegShape(Register PhysReg, Register VirtReg); + enum : unsigned { spillClean = 50, spillDirty = 100, @@ -281,6 +292,7 @@ Register traceCopies(Register VirtReg) const; Register traceCopyChain(Register Reg) const; + bool ShouldAllocateRegister(const Register Reg) const; int getStackSpaceFor(Register VirtReg); void spill(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg AssignedReg, bool Kill, bool LiveOut); @@ -300,6 +312,11 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) +bool RegAllocFast::ShouldAllocateRegister(const Register Reg) const { + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + return ShouldAllocateClass(*TRI, RC); +} + void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) RegUnitStates[*UI] = NewState; @@ -333,6 +350,31 @@ return FrameIdx; } +bool RegAllocFast::isShapeCompatible(Register PhysReg, Register VirtReg) { + // Most register doesn't have shape property. For X86 AMX register, it is + // 2D tile register and has shape (row, column) property. + if (!TRI->hasShape(PhysReg)) + return true; + + ShapeT PhysShape = PhysRegShapeMap[PhysReg]; + // If the physical register is not allocated yet, no shape matching check. + if (!PhysShape.getRow()) + return true; + ShapeT VirtShape = TRI->getTileShape(MRI, VirtReg); + return PhysShape == VirtShape; +} + +// Record the shape of the physical register after it is allocated to a +// virtual register. The physical register can't be reallocated to another +// virtual register whose shape is not the same to the previous allocated +// virtual register. +void RegAllocFast::setPhysRegShape(Register PhysReg, Register VirtReg) { + if (!TRI->hasShape(PhysReg)) + return; + ShapeT Shape = TRI->getTileShape(MRI, VirtReg); + PhysRegShapeMap[PhysReg] = Shape; +} + static bool dominates(MachineBasicBlock &MBB, MachineBasicBlock::const_iterator A, MachineBasicBlock::const_iterator B) { @@ -691,6 +733,9 @@ assert(PhysReg != 0 && "Trying to assign no register"); LR.PhysReg = PhysReg; setPhysRegState(PhysReg, VirtReg); + // Record the shape of the physical register, so that it can NOT + // be reallocated to a virtual register with different shape. + setPhysRegShape(PhysReg, VirtReg); assignDanglingDebugValues(AtMI, VirtReg, PhysReg); } @@ -748,7 +793,8 @@ // Take hint when possible. if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) && - !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) { + !isRegUsedInInstr(Hint0, LookAtPhysRegUses) && + isShapeCompatible(Hint0, VirtReg)) { // Take hint if the register is currently free. if (isPhysRegFree(Hint0)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) @@ -763,11 +809,11 @@ Hint0 = Register(); } - // Try other hint. Register Hint1 = traceCopies(VirtReg); if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) && - !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) { + !isRegUsedInInstr(Hint1, LookAtPhysRegUses) && + isShapeCompatible(Hint0, VirtReg)) { // Take hint if the register is currently free. if (isPhysRegFree(Hint1)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) @@ -791,6 +837,10 @@ LLVM_DEBUG(dbgs() << "already used in instr.\n"); continue; } + if (!isShapeCompatible(PhysReg, VirtReg)) { + LLVM_DEBUG(dbgs() << "not be shape compatible.\n"); + continue; + } unsigned Cost = calcSpillCost(PhysReg); LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); @@ -830,6 +880,8 @@ assert(MO.isUndef() && "expected undef use"); Register VirtReg = MO.getReg(); assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg"); + if (!ShouldAllocateRegister(VirtReg)) + return; LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); MCPhysReg PhysReg; @@ -855,6 +907,8 @@ /// (tied or earlyclobber) that may interfere with preassigned uses. void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { + if (!ShouldAllocateRegister(VirtReg)) + return; LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end()) { MCPhysReg PrevReg = LRI->PhysReg; @@ -888,6 +942,8 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, bool LookAtPhysRegUses) { assert(VirtReg.isVirtual() && "Not a virtual register"); + if (!ShouldAllocateRegister(VirtReg)) + return; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -938,6 +994,8 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { assert(VirtReg.isVirtual() && "Not a virtual register"); + if (!ShouldAllocateRegister(VirtReg)) + return; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -1071,6 +1129,8 @@ assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); if (Reg.isVirtual()) { + if (!ShouldAllocateRegister(Reg)) + return; const TargetRegisterClass *OpRC = MRI->getRegClass(Reg); for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); RCIdx != RCIdxEnd; ++RCIdx) { @@ -1130,6 +1190,8 @@ if (MO.isReg()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { + if (!ShouldAllocateRegister(Reg)) + continue; if (MO.isDef()) { HasDef = true; HasVRegDef = true; @@ -1193,7 +1255,7 @@ } if (MO.isDef()) { - if (Reg.isVirtual()) + if (Reg.isVirtual() && ShouldAllocateRegister(Reg)) DefOperandIndexes.push_back(I); addRegClassDefCounts(RegClassDefCounts, Reg); @@ -1283,6 +1345,8 @@ Register Reg = MO.getReg(); if (!Reg) continue; + if (Reg.isVirtual() && !ShouldAllocateRegister(Reg)) + continue; assert(Reg.isPhysical()); if (MRI->isReserved(Reg)) continue; @@ -1329,7 +1393,7 @@ if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual()) + if (!Reg.isVirtual() || !ShouldAllocateRegister(Reg)) continue; if (MO.isUndef()) { @@ -1356,7 +1420,7 @@ if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual()) + if (!Reg.isVirtual() || !ShouldAllocateRegister(Reg)) continue; assert(MO.isUndef() && "Should only have undef virtreg uses left"); @@ -1379,6 +1443,8 @@ Register Reg = MO.getReg(); if (!Reg) continue; + if (Reg.isVirtual() && !ShouldAllocateRegister(Reg)) + continue; assert(Reg.isPhysical() && "should have register assigned"); // We sometimes get odd situations like: @@ -1408,6 +1474,8 @@ for (Register Reg : MI.getUsedDebugRegs()) { if (!Register::isVirtualRegister(Reg)) continue; + if (!ShouldAllocateRegister(Reg)) + continue; // Already spilled to a stackslot? int SS = StackSlotForVirtReg[Reg]; @@ -1448,7 +1516,7 @@ continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual()) + if (!Reg.isVirtual() || !ShouldAllocateRegister(Reg)) continue; DenseMap::iterator DI; @@ -1481,6 +1549,8 @@ dumpState() ); + if (TII->isConfigRegister(MI)) + PhysRegShapeMap.clear(); // Special handling for debug values. Note that they are not allowed to // affect codegen of the other instructions in any way. if (MI.isDebugValue()) { @@ -1551,6 +1621,7 @@ LiveVirtRegs.setUniverse(NumVirtRegs); MayLiveAcrossBlocks.clear(); MayLiveAcrossBlocks.resize(NumVirtRegs); + PhysRegShapeMap.clear(); // Loop over all of the basic blocks, eliminating virtual register references for (MachineBasicBlock &MBB : MF) diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TileShapeInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" @@ -592,6 +593,11 @@ return BestIdx; } +ShapeT TargetRegisterInfo::getTileShape(MachineRegisterInfo *MRI, + Register VirtReg) const { + llvm_unreachable("Target register doesn't have shape"); +} + Register TargetRegisterInfo::lookThruCopyLike(Register SrcReg, const MachineRegisterInfo *MRI) const { diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -134,7 +134,6 @@ return Op + X86::AddrNumOperands <= MI.getNumOperands() && MI.getOperand(Op + X86::AddrSegmentReg).isReg() && isLeaMem(MI, Op); } - class X86InstrInfo final : public X86GenInstrInfo { X86Subtarget &Subtarget; const X86RegisterInfo RI; @@ -156,6 +155,12 @@ /// const X86RegisterInfo &getRegisterInfo() const { return RI; } + bool isConfigRegister(const MachineInstr &MI) const { + if (MI.getOpcode() == X86::PLDTILECFGV || MI.getOpcode() == X86::LDTILECFG) + return true; + return false; + } + /// Returns the stack pointer adjustment that happens inside the frame /// setup..destroy sequence (e.g. by pushes, or inside the callee). int64_t getFrameAdjustment(const MachineInstr &I) const { diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -48,12 +48,20 @@ /// variable size stack objects. unsigned BasePtr; + ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const; + public: explicit X86RegisterInfo(const Triple &TT); // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; + bool hasShape(Register PhysReg) const override; + + ShapeT getTileShape(MachineRegisterInfo *MRI, + Register VirtReg) const override; + /// getMatchingSuperRegClass - Return a subclass of the specified register /// class A so that each register in it has a sub-register of the /// specified sub-register index which is in the specified register class B. @@ -120,6 +128,8 @@ bool isArgumentRegister(const MachineFunction &MF, MCRegister Reg) const override; + bool isTileRegisterClass(const TargetRegisterClass *RC) const; + /// Returns true if PhysReg is a fixed register. bool isFixedRegister(const MachineFunction &MF, MCRegister PhysReg) const override; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -676,6 +676,10 @@ return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); } +bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { + return RC->getID() == X86::TILERegClassID; +} + void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { // Check if the EFLAGS register is marked as live-out. This shouldn't happen, // because the calling convention defines the EFLAGS register as NOT @@ -930,8 +934,30 @@ return StackReg; } -static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, - const MachineRegisterInfo *MRI) { +bool X86RegisterInfo::hasShape(Register PhysReg) const { + return PhysReg >= X86::TMM0 && PhysReg <= X86::TMM7; +} + +ShapeT X86RegisterInfo::getTileShape(MachineRegisterInfo *MRI, + Register VirtReg) const { + // All the def of the tile instruction should share the same shape. + MachineInstr &DefMI = *MRI->def_instr_begin(VirtReg); + if (DefMI.isCopy()) { + Register SrcReg = DefMI.getOperand(1).getReg(); + ShapeT Shape = getTileShape(MRI, SrcReg); + return Shape; + } + // PHI is converted to tileload in fast pre tile config pass. + assert(!DefMI.isPHI() && "Must not be AMX PHI instruction"); + assert(DefMI.isPseudo() && "Must be AMX pseudo instruction"); + MachineOperand *RowMO = &DefMI.getOperand(1); + MachineOperand *ColMO = &DefMI.getOperand(2); + ShapeT Shape(RowMO, ColMO, MRI); + return Shape; +} + +ShapeT X86RegisterInfo::getTileShape(Register VirtReg, VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const { if (VRM->hasShape(VirtReg)) return VRM->getShape(VirtReg); diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -57,6 +57,10 @@ static cl::opt EnableMachineCombinerPass("x86-machine-combiner", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); +static cl::opt + EnableTileRAPass("x86-tile-ra", + cl::desc("Enable the tile register allocation pass"), + cl::init(true), cl::Hidden); extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { // Register the target. @@ -387,6 +391,7 @@ void addPreEmitPass2() override; void addPreSched2() override; bool addPreRewrite() override; + bool addRegAssignAndRewriteFast() override; std::unique_ptr getCSEConfig() const override; }; @@ -607,6 +612,18 @@ })); } +static bool onlyAllocateTileRegisters(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) { + return static_cast(TRI).isTileRegisterClass(&RC); +} + +bool X86PassConfig::addRegAssignAndRewriteFast() { + // Allocate AMX registers separately. + if (EnableTileRAPass) + addPass(createFastRegisterAllocator(onlyAllocateTileRegisters, false)); + return TargetPassConfig::addRegAssignAndRewriteFast(); +} + bool X86PassConfig::addPostFastRegAllocRewrite() { addPass(createX86FastTileConfigPass()); return true; diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll --- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll @@ -55,10 +55,10 @@ ; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; AVX512-NEXT: tileloadd (%r8,%r9), %tmm0 +; AVX512-NEXT: tileloadd (%r8,%r9), %tmm1 ; AVX512-NEXT: movl $64, %r8d ; AVX512-NEXT: movw $8, %si -; AVX512-NEXT: tilestored %tmm0, (%rdi,%r8) +; AVX512-NEXT: tilestored %tmm1, (%rdi,%r8) ; AVX512-NEXT: movl $buf, %esi ; AVX512-NEXT: movl $32, %edi ; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0 @@ -92,10 +92,10 @@ ; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; AVX512-NEXT: tileloadd (%r8,%r9), %tmm0 +; AVX512-NEXT: tileloadd (%r8,%r9), %tmm1 ; AVX512-NEXT: movl $64, %r8d ; AVX512-NEXT: movw $8, %si -; AVX512-NEXT: tilestored %tmm0, (%rdi,%r8) +; AVX512-NEXT: tilestored %tmm1, (%rdi,%r8) ; AVX512-NEXT: movl $buf2, %esi ; AVX512-NEXT: movl $32, %edi ; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastalloc.mir b/llvm/test/CodeGen/X86/AMX/amx-fastalloc.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-fastalloc.mir @@ -0,0 +1,99 @@ +# RUN: llc -O0 -mtriple=x86_64-- -mattr=+amx-int8,avx512f -o - %s | FileCheck %s + +--- +name: foo_no_reuse +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr16 } + - { id: 2, class: gr16 } + - { id: 3, class: tile } + - { id: 4, class: tile } + - { id: 5, class: tile } + - { id: 6, class: tile } + - { id: 7, class: gr64_nosp } + - { id: 8, class: tile } + - { id: 9, class: tile } + - { id: 10, class: tile } + - { id: 11, class: tile } + - { id: 12, class: vr512 } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, size: 64, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $rdi + + %0:gr64 = COPY killed $rdi + %1:gr16 = MOV16ri 32 + %2:gr16 = MOV16ri 8 + %3:tile = PTILEZEROV %2, %1 + %4:tile = PTILEZEROV %2, %1 + %5:tile = PTILEZEROV %2, %1 + ; CHECK: tdpbssd %tmm4, %tmm3, %tmm0 + %6:tile = PTDPBSSDV %2, %2, %1, killed %5, killed %4, killed %3 + %7:gr64_nosp = MOV32ri64 1024 + PTILESTOREDV %2, %1, %0, 1, %7, 0, $noreg, killed %6 + %8:tile = PTILEZEROV %1, %1 + %9:tile = PTILEZEROV %1, %1 + %10:tile = PTILEZEROV %2, %1 + ; CHECK: tdpbssd %tmm2, %tmm1, %tmm0 + %11:tile = PTDPBSSDV %2, %1, %1, killed %10, killed %8, killed %9 + PTILESTOREDV killed %2, killed %1, killed %0, 1, killed %7, 0, $noreg, killed %11 + RET 0 + +... + +--- +name: foo_reuse +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr16 } + - { id: 2, class: gr16 } + - { id: 3, class: tile } + - { id: 4, class: tile } + - { id: 5, class: tile } + - { id: 6, class: tile } + - { id: 7, class: gr64_nosp } + - { id: 8, class: tile } + - { id: 9, class: tile } + - { id: 10, class: tile } + - { id: 11, class: tile } + - { id: 12, class: vr512 } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, size: 64, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $rdi + + %0:gr64 = COPY killed $rdi + %1:gr16 = MOV16ri 32 + %2:gr16 = MOV16ri 8 + %3:tile = PTILEZEROV %2, %1 + %4:tile = PTILEZEROV %2, %1 + %5:tile = PTILEZEROV %2, %1 + ; CHECK: tdpbssd %tmm2, %tmm1, %tmm0 + %6:tile = PTDPBSSDV %2, %2, %1, killed %5, killed %4, killed %3 + %7:gr64_nosp = MOV32ri64 1024 + PTILESTOREDV %2, %1, %0, 1, %7, 0, $noreg, killed %6 + %8:tile = PTILEZEROV %2, %1 + %9:tile = PTILEZEROV %2, %1 + %10:tile = PTILEZEROV %2, %1 + ; CHECK: tdpbssd %tmm2, %tmm1, %tmm0 + %11:tile = PTDPBSSDV %2, %2, %1, killed %10, killed %8, killed %9 + PTILESTOREDV killed %2, killed %1, killed %0, 1, killed %7, 0, $noreg, killed %11 + RET 0 + +... diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -45,6 +45,7 @@ ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Fast Tile Register Configure ; CHECK-NEXT: X86 Lower Tile Copy ; CHECK-NEXT: Bundle Machine CFG Edges diff --git a/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir b/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir @@ -1,5 +1,5 @@ # RUN: llc -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations | FileCheck %s -# RUN: llc -O0 -start-after=phi-node-elimination -stop-after=regallocfast %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations | FileCheck %s --check-prefix=FASTREG +# RUN: llc -O0 -start-after=phi-node-elimination -x86-tile-ra=0 -stop-after=regallocfast %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations | FileCheck %s --check-prefix=FASTREG # # Test that DBG_INSTR_REFs can pass through livedebugvariables to the end of # regalloc without problem. Program body copied from