diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -345,6 +345,9 @@ // Helper to verify the analysis is really immutable. void setOpt(bool &Opt, bool Val); + /// Return true if register allocator is specified by -regalloc=override. + bool isCustomizedRegAlloc(); + /// Methods with trivial inline returns are convenient points in the common /// codegen pass pipeline where targets may insert passes. Methods with /// out-of-line standard implementations are major CodeGen stages called by diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1407,6 +1407,11 @@ return createTargetRegisterAllocator(Optimized); } +bool TargetPassConfig::isCustomizedRegAlloc() { + return RegAlloc != + (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator; +} + bool TargetPassConfig::addRegAssignAndRewriteFast() { if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator && RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator) diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -247,8 +247,10 @@ AU.addPreserved(); AU.addRequired(); - if (!ClearVirtRegs) + if (!ClearVirtRegs) { + AU.addPreserved(); AU.addPreserved(); + } MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -40,6 +40,7 @@ X86TileConfig.cpp X86FastPreTileConfig.cpp X86FastTileConfig.cpp + X86TileConfigUtils.cpp X86PreTileConfig.cpp X86ExpandPseudo.cpp X86FastISel.cpp diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp --- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -85,6 +85,11 @@ return "Fast Tile Register Preconfigure"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + /// Perform tile register configure. bool runOnMachineFunction(MachineFunction &MFunc) override; @@ -662,12 +667,21 @@ TRI = ST->getRegisterInfo(); CfgSS = -1; + // The config may be done by previous pass in opt mode which is more + // aggressive to configure the function only once and never change the + // config. If it has been configured, just bail out. + if (X86FI->hasSingleTileConfig()) + return false; + unsigned NumVirtRegs = MRI->getNumVirtRegs(); // Abandon early if there is no tile register to config. bool HasVirtTileReg = false; for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) { Register VirtReg = Register::index2VirtReg(I); - if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) { + const TargetRegisterClass *RC = MRI->getRegClassOrNull(VirtReg); + if (!RC) + continue; + if (RC->getID() == X86::TILERegClassID) { HasVirtTileReg = true; break; } diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -22,6 +22,7 @@ #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" +#include "X86TileConfigUtils.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -42,11 +43,8 @@ MachineFunction *MF = nullptr; const TargetInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; - const TargetRegisterInfo *TRI = nullptr; X86MachineFunctionInfo *X86FI = nullptr; - bool configBasicBlock(MachineBasicBlock &MBB); - public: X86FastTileConfig() : MachineFunctionPass(ID) {} @@ -80,104 +78,26 @@ INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, "Fast Tile Register Configure", false, false) -static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { - // There is no phi instruction after register allocation. - assert(MI.isPHI() == false); - // The instruction must have 3 operands: tile def, row, col. - // It should be AMX pseudo instruction that have shape operand. - if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || - !MI.isPseudo()) - return false; - MachineOperand &MO = MI.getOperand(0); - - if (MO.isReg()) { - Register Reg = MO.getReg(); - // FIXME it may be used after Greedy RA and the physical - // register is not rewritten yet. - if (Reg.isVirtual() && - MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) - return true; - if (Reg >= X86::TMM0 && Reg <= X86::TMM7) - return true; - } - - return false; -} - -// PreTileConfig should configure the tile registers based on basic -// block. -bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { - bool Change = false; - SmallVector, 6> ShapeInfos; - for (MachineInstr &MI : reverse(MBB)) { - if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV) - continue; - // AMX instructions that define tile register. - if (MI.getOpcode() != X86::PLDTILECFGV) { - MachineOperand &Row = MI.getOperand(1); - MachineOperand &Col = MI.getOperand(2); - unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0; - ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)}); - } else { // PLDTILECFGV - // Rewrite the shape information to memory. Stack slot should have - // been initialized to zero in pre config. - int SS = MI.getOperand(0).getIndex(); // tile config stack slot. - for (auto &ShapeInfo : ShapeInfos) { - DebugLoc DL; - unsigned TMMIdx = ShapeInfo.first; - Register RowReg = ShapeInfo.second.getRow()->getReg(); - Register ColReg = ShapeInfo.second.getCol()->getReg(); - // Here is the data format for the tile config. - // 0 palette - // 1 start_row - // 2-15 reserved, must be zero - // 16-17 tile0.colsb Tile 0 bytes per row. - // 18-19 tile1.colsb Tile 1 bytes per row. - // 20-21 tile2.colsb Tile 2 bytes per row. - // ... (sequence continues) - // 30-31 tile7.colsb Tile 7 bytes per row. - // 32-47 reserved, must be zero - // 48 tile0.rows Tile 0 rows. - // 49 tile1.rows Tile 1 rows. - // 50 tile2.rows Tile 2 rows. - // ... (sequence continues) - // 55 tile7.rows Tile 7 rows. - // 56-63 reserved, must be zero - int RowOffset = 48 + TMMIdx; - int ColOffset = 16 + TMMIdx * 2; - - MachineInstrBuilder StoreRow = - BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr)); - addFrameReference(StoreRow, SS, RowOffset) - .addReg(RowReg, 0, X86::sub_8bit); - - MachineInstrBuilder StoreCol = - BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr)); - addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg); - } - ShapeInfos.clear(); - Change = true; - } - } - - if (Change) - X86FI->setHasVirtualTileReg(true); - - return Change; -} - bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { MF = &MFunc; MRI = &MFunc.getRegInfo(); - const TargetSubtargetInfo *ST = &MFunc.getSubtarget(); - TRI = ST->getRegisterInfo(); TII = MFunc.getSubtarget().getInstrInfo(); X86FI = MFunc.getInfo(); bool Change = false; + SmallVector NewInstrs; + + // The config may be done by signle config pass in opt mode which is more + // aggressive to configure the function only once and never change the + // config. In that case just bail out. + if (X86FI->hasSingleTileConfig()) + return false; // Loop over all of the basic blocks, eliminating virtual register references for (MachineBasicBlock &MBB : MFunc) - Change |= configBasicBlock(MBB); + Change |= amx::configBasicBlock(MBB, MRI, TII, NewInstrs); + + if (Change) + X86FI->setHasVirtualTileReg(true); return Change; } diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -117,6 +117,12 @@ /// determine if we should insert tilerelease in frame lowering. bool HasVirtualTileReg = false; + /// True if this function is configured successfully in a single config + /// which mean the config never changes in the function. In opt mode + /// compiler try to configure tile registers once, but it may fail. If + /// compiler successfully finish single config, it mark this flag true; + bool HasSingleTileConfig = false; + Optional SwiftAsyncContextFrameIdx; // Preallocated fields are only used during isel. @@ -216,6 +222,9 @@ bool hasVirtualTileReg() const { return HasVirtualTileReg; } void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + bool hasSingleTileConfig() const { return HasSingleTileConfig; } + void setHasSingleTileConfig(bool v) { HasSingleTileConfig = v; } + Optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -42,15 +42,6 @@ #define DEBUG_TYPE "tile-pre-config" -static void emitErrorMsg(MachineFunction &MF) { - SmallString<32> Str; - Twine ErrorMsg = - MF.getName() + - ": Failed to config tile register, please define the shape earlier"; - LLVMContext &Context = MF.getMMI().getModule()->getContext(); - Context.emitError(ErrorMsg); -} - namespace { struct MIRef { @@ -310,15 +301,12 @@ // TODO: We can hoist shapes across BBs here. if (BBVisitedInfo[I.first].HasAMXRegLiveIn) { // We are not able to config tile registers since the shape to config - // is not defined yet. Emit error message and continue. The function - // would not config tile registers. - emitErrorMsg(MF); + // is not defined yet. return false; } if (BBVisitedInfo[I.first].FirstAMX && BBVisitedInfo[I.first].FirstAMX < I.second.back() && !hoistShapesInBB(I.first, I.second)) { - emitErrorMsg(MF); return false; } WorkList.push_back(I.first); @@ -405,6 +393,9 @@ } // Fill in the palette first. addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), SS).addImm(1); + // Set the HasSingleTileConfig true, so that fast config pass would bail out + // when this flag is set. + X86FI->setHasSingleTileConfig(true); return true; } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" @@ -389,8 +390,8 @@ void addPreEmitPass() override; void addPreEmitPass2() override; void addPreSched2() override; - bool addPreRewrite() override; bool addRegAssignAndRewriteFast() override; + bool addRegAssignAndRewriteOptimized() override; std::unique_ptr getCSEConfig() const override; }; @@ -515,8 +516,7 @@ if (getOptLevel() != CodeGenOpt::None) addPass(createX86PreTileConfigPass()); - else - addPass(createX86FastPreTileConfigPass()); + addPass(createX86FastPreTileConfigPass()); } void X86PassConfig::addMachineSSAOptimization() { @@ -619,15 +619,26 @@ bool X86PassConfig::addRegAssignAndRewriteFast() { // Allocate AMX registers separately. if (EnableTileRAPass) { + // Allocate tile register first so that other register is still virtual + // register. This helps a) spill tile register which would create GPR + // virtual register; b) the shape is still in virtual register which + // avoid hanle split/spill cases when fill shape information in tile + // config pass. addPass(createFastRegisterAllocator(onlyAllocateTileRegisters, false)); addPass(createX86FastTileConfigPass()); } return TargetPassConfig::addRegAssignAndRewriteFast(); } -bool X86PassConfig::addPreRewrite() { - addPass(createX86TileConfigPass()); - return true; +bool X86PassConfig::addRegAssignAndRewriteOptimized() { + // Don't support tile RA when RA by command line "-regalloc". + if (!isCustomizedRegAlloc() && EnableTileRAPass) { + // Allocate tile register first. + addPass(createGreedyRegisterAllocator(onlyAllocateTileRegisters)); + addPass(createVirtRegRewriter(false)); + addPass(createX86TileConfigPass()); + } + return TargetPassConfig::addRegAssignAndRewriteOptimized(); } std::unique_ptr X86PassConfig::getCSEConfig() const { diff --git a/llvm/lib/Target/X86/X86TileConfig.cpp b/llvm/lib/Target/X86/X86TileConfig.cpp --- a/llvm/lib/Target/X86/X86TileConfig.cpp +++ b/llvm/lib/Target/X86/X86TileConfig.cpp @@ -22,6 +22,7 @@ #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" +#include "X86TileConfigUtils.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -83,7 +84,19 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); LiveIntervals &LIS = getAnalysis(); VirtRegMap &VRM = getAnalysis(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + + if (!X86FI->hasSingleTileConfig()) { + bool Change = false; + SmallVector NewInstrs; + for (MachineBasicBlock &MBB : MF) + Change |= amx::configBasicBlock(MBB, &MRI, TII, NewInstrs); + for (auto *NewMI : NewInstrs) + LIS.InsertMachineInstrInMaps(*NewMI); + return Change; + } + // single config if (VRM.isShapeMapEmpty()) return false; @@ -119,10 +132,12 @@ SmallVector Phys2Virt(AMXRegNum, 0); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { Register VirtReg = Register::index2VirtReg(I); - if (MRI.reg_nodbg_empty(VirtReg)) - continue; + // if (MRI.reg_nodbg_empty(VirtReg)) + // continue; if (MRI.getRegClass(VirtReg)->getID() != X86::TILERegClassID) continue; + if (VRM.getPhys(VirtReg) == VirtRegMap::NO_PHYS_REG) + continue; unsigned Index = VRM.getPhys(VirtReg) - X86::TMM0; if (!Phys2Virt[Index]) Phys2Virt[Index] = VirtReg; diff --git a/llvm/lib/Target/X86/X86TileConfigUtils.h b/llvm/lib/Target/X86/X86TileConfigUtils.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86TileConfigUtils.h @@ -0,0 +1,32 @@ +//===------ X86TileConfigUtils.cpp - utility for tile config---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +using namespace llvm; + +namespace amx { + +bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI); + +// The method configure the tile registers based on basic block. It +// collects the shape information of each physical tile register and +// store the shape in the stack slot that is allocated for load config +// to tile config register. +bool configBasicBlock(MachineBasicBlock &MBB, MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + SmallVectorImpl &NewInsts); + +} // namespace amx diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86TileConfigUtils.cpp copy from llvm/lib/Target/X86/X86FastTileConfig.cpp copy to llvm/lib/Target/X86/X86TileConfigUtils.cpp --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86TileConfigUtils.cpp @@ -17,70 +17,21 @@ // //===----------------------------------------------------------------------===// +#include "X86TileConfigUtils.h" #include "X86.h" #include "X86InstrBuilder.h" -#include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/InitializePasses.h" using namespace llvm; -#define DEBUG_TYPE "fasttileconfig" +namespace amx { -namespace { - -class X86FastTileConfig : public MachineFunctionPass { - // context - MachineFunction *MF = nullptr; - const TargetInstrInfo *TII = nullptr; - MachineRegisterInfo *MRI = nullptr; - const TargetRegisterInfo *TRI = nullptr; - X86MachineFunctionInfo *X86FI = nullptr; - - bool configBasicBlock(MachineBasicBlock &MBB); - -public: - X86FastTileConfig() : MachineFunctionPass(ID) {} - - /// Return the pass name. - StringRef getPassName() const override { - return "Fast Tile Register Configure"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - /// Perform register allocation. - bool runOnMachineFunction(MachineFunction &MFunc) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoPHIs); - } - - static char ID; -}; - -} // end anonymous namespace - -char X86FastTileConfig::ID = 0; - -INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, - "Fast Tile Register Configure", false, false) -INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, - "Fast Tile Register Configure", false, false) - -static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { +bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { // There is no phi instruction after register allocation. assert(MI.isPHI() == false); // The instruction must have 3 operands: tile def, row, col. @@ -106,7 +57,9 @@ // PreTileConfig should configure the tile registers based on basic // block. -bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { +bool configBasicBlock(MachineBasicBlock &MBB, MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + SmallVectorImpl &NewInstrs) { bool Change = false; SmallVector, 6> ShapeInfos; for (MachineInstr &MI : reverse(MBB)) { @@ -150,38 +103,19 @@ BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr)); addFrameReference(StoreRow, SS, RowOffset) .addReg(RowReg, 0, X86::sub_8bit); + NewInstrs.push_back(StoreRow); MachineInstrBuilder StoreCol = BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr)); addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg); + NewInstrs.push_back(StoreCol); } ShapeInfos.clear(); Change = true; } } - if (Change) - X86FI->setHasVirtualTileReg(true); - return Change; } -bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { - MF = &MFunc; - MRI = &MFunc.getRegInfo(); - const TargetSubtargetInfo *ST = &MFunc.getSubtarget(); - TRI = ST->getRegisterInfo(); - TII = MFunc.getSubtarget().getInstrInfo(); - X86FI = MFunc.getInfo(); - bool Change = false; - - // Loop over all of the basic blocks, eliminating virtual register references - for (MachineBasicBlock &MBB : MFunc) - Change |= configBasicBlock(MBB); - - return Change; -} - -FunctionPass *llvm::createX86FastTileConfigPass() { - return new X86FastTileConfig(); -} +} // namespace amx diff --git a/llvm/test/CodeGen/Generic/live-debug-label.ll b/llvm/test/CodeGen/Generic/live-debug-label.ll --- a/llvm/test/CodeGen/Generic/live-debug-label.ll +++ b/llvm/test/CodeGen/Generic/live-debug-label.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -stop-after=virtregrewriter -o - | FileCheck %s +; RUN: llc < %s -x86-tile-ra=0 -stop-after=virtregrewriter -o - | FileCheck %s ; ; NVPTX produces a different order of the BBs ; XFAIL: nvptx diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -479,14 +479,14 @@ ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $1088, %rsp # imm = 0x440 -; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movl %edi, %r15d ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovups %zmm0, (%rsp) ; CHECK-NEXT: movb $1, (%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movl $buf, %r14d -; CHECK-NEXT: movl $32, %r15d +; CHECK-NEXT: movl $32, %ebx ; CHECK-NEXT: movw $8, %bp ; CHECK-NEXT: movl $buf+2048, %r12d ; CHECK-NEXT: .p2align 4, 0x90 @@ -494,17 +494,17 @@ ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo ; CHECK-NEXT: ldtilecfg (%rsp) -; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: testl %r15d, %r15d ; CHECK-NEXT: jle .LBB3_3 ; CHECK-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: tileloadd (%r14,%r15), %tmm0 +; CHECK-NEXT: tileloadd (%r14,%rbx), %tmm0 ; CHECK-NEXT: movabsq $64, %rax ; CHECK-NEXT: tilestored %tmm0, 64(%rsp,%rax) # 1024-byte Folded Spill ; CHECK-NEXT: callq foo ; CHECK-NEXT: ldtilecfg (%rsp) ; CHECK-NEXT: movabsq $64, %rax ; CHECK-NEXT: tileloadd 64(%rsp,%rax), %tmm0 # 1024-byte Folded Reload -; CHECK-NEXT: tilestored %tmm0, (%r12,%r15) +; CHECK-NEXT: tilestored %tmm0, (%r12,%rbx) ; CHECK-NEXT: callq foo ; CHECK-NEXT: jmp .LBB3_1 ; CHECK-NEXT: .LBB3_3: diff --git a/llvm/test/CodeGen/X86/AMX/amx-config-fallback.ll b/llvm/test/CodeGen/X86/AMX/amx-config-fallback.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/AMX/amx-config-fallback.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f | FileCheck %s + +@buf = dso_local global [1024 x i8] zeroinitializer, align 16 +@buf2 = dso_local global [1024 x i8] zeroinitializer, align 16 + +define dso_local void @foo(i32 %cond, i16 signext %row, i16 signext %col, i16* %ptr) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $8048, %rsp # imm = 0x1F70 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl $buf, %r11d +; CHECK-NEXT: movl $32, %ebx +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %edx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r8 +; CHECK-NEXT: tileloadd (%r11,%rbx), %tmm0 +; CHECK-NEXT: movabsq $64, %rdi +; CHECK-NEXT: tilestored %tmm0, 3968(%rsp,%rdi) # 1024-byte Folded Spill +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r9 +; CHECK-NEXT: tileloadd (%r11,%rbx), %tmm1 +; CHECK-NEXT: movabsq $64, %rdi +; CHECK-NEXT: tilestored %tmm1, 4992(%rsp,%rdi) # 1024-byte Folded Spill +; CHECK-NEXT: movzwl (%rcx), %ecx +; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %edx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: tileloadd (%r11,%rbx), %tmm2 +; CHECK-NEXT: movabsq $64, %rdi +; CHECK-NEXT: tilestored %tmm2, 6016(%rsp,%rdi) # 1024-byte Folded Spill +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_2: # %if.else +; CHECK-NEXT: movl $buf2, %r11d +; CHECK-NEXT: movl $32, %ebx +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %edx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r8 +; CHECK-NEXT: tileloadd (%r11,%rbx), %tmm0 +; CHECK-NEXT: movabsq $64, %rdi +; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rdi) # 1024-byte Folded Spill +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r9 +; CHECK-NEXT: tileloadd (%r11,%rbx), %tmm1 +; CHECK-NEXT: movabsq $64, %rdi +; CHECK-NEXT: tilestored %tmm1, 1920(%rsp,%rdi) # 1024-byte Folded Spill +; CHECK-NEXT: movzwl (%rcx), %edi +; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: tileloadd (%r11,%rbx), %tmm3 +; CHECK-NEXT: movabsq $64, %rcx +; CHECK-NEXT: tilestored %tmm3, 2944(%rsp,%rcx) # 1024-byte Folded Spill +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %edx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %edx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; CHECK-NEXT: movabsq $64, %rbx +; CHECK-NEXT: tileloadd (%r8,%rbx), %tmm0 +; CHECK-NEXT: movabsq $64, %rbx +; CHECK-NEXT: tileloadd (%r9,%rbx), %tmm1 +; CHECK-NEXT: movabsq $64, %rax +; CHECK-NEXT: tileloadd (%r10,%rax), %tmm4 +; CHECK-NEXT: movw $8, %ax +; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm4 +; CHECK-NEXT: movl $buf, %eax +; CHECK-NEXT: movl $32, %ecx +; CHECK-NEXT: tilestored %tmm4, (%rax,%rcx) +; CHECK-NEXT: addq $8048, %rsp # imm = 0x1F70 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: tilerelease +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %tobool.not = icmp eq i32 %cond, 0 + br i1 %tobool.not, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32) + %1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32) + %r = load i16, i16* %ptr + %2 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %r, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32) + br label %if.end + +if.else: ; preds = %entry + %3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32) + %4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32) + %c = load i16, i16* %ptr + %5 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %c, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %a.sroa.1094.0.in = phi x86_amx [ %3, %if.else ], [ %0, %if.then ] + %b.sroa.1069.0.in = phi x86_amx [ %4, %if.else ], [ %1, %if.then ] + %c.sroa.1044.0.in = phi x86_amx [ %5, %if.else ], [ %2, %if.then ] + %6 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %c.sroa.1044.0.in, x86_amx %a.sroa.1094.0.in, x86_amx %b.sroa.1069.0.in) + tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx %6) + ret void +} + +declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64) +declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) +declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) diff --git a/llvm/test/CodeGen/X86/AMX/amx-error.ll b/llvm/test/CodeGen/X86/AMX/amx-error.ll deleted file mode 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-error.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -o /dev/null 2>&1 | FileCheck %s - -@row = dso_local global i16 8, align 2 -@col = dso_local global i16 8, align 2 - -define dso_local void @add() { -entry: - ; CHECK: Failed to config tile register - %t0 = load i16, ptr @row, align 2 - %t1 = call x86_amx @llvm.x86.tilezero.internal(i16 %t0, i16 64) - %t2 = load i16, ptr @col, align 2 - %t3 = call x86_amx @llvm.x86.tilezero.internal(i16 16, i16 %t2) - ret void -} - -declare x86_amx @llvm.x86.tilezero.internal(i16, i16) diff --git a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll --- a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll @@ -106,14 +106,14 @@ ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, %r14w +; CHECK-NEXT: movw $8, %bp ; CHECK-NEXT: tilezero %tmm0 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB1_3 ; CHECK-NEXT: # %bb.1: # %loop.header.preheader -; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movl $32, %r15d ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB1_2: # %loop.header @@ -123,12 +123,12 @@ ; CHECK-NEXT: callq foo ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: tilezero %tmm2 -; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm0 -; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm1 +; CHECK-NEXT: tileloadd (%r14,%r15), %tmm0 +; CHECK-NEXT: tileloadd (%r14,%r15), %tmm1 ; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 -; CHECK-NEXT: tilestored %tmm2, (%rbx,%r15) -; CHECK-NEXT: incl %ebp -; CHECK-NEXT: cmpw $100, %bp +; CHECK-NEXT: tilestored %tmm2, (%r14,%r15) +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: cmpw $100, %bx ; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: .LBB1_3: # %exit ; CHECK-NEXT: addq $72, %rsp diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll --- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll @@ -131,32 +131,32 @@ ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, %r15w +; CHECK-NEXT: movw $8, %bp ; CHECK-NEXT: tilezero %tmm0 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB1_3 ; CHECK-NEXT: # %bb.1: # %loop.header.preheader -; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq %rdi, %r15 ; CHECK-NEXT: movl $32, %r14d -; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB1_2: # %loop.header ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: tilestored %tmm0, (%rbx,%r14) +; CHECK-NEXT: tilestored %tmm0, (%r15,%r14) ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: tilezero %tmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: tilezero %tmm0 -; CHECK-NEXT: tileloadd (%rbx,%r14), %tmm1 -; CHECK-NEXT: tileloadd (%rbx,%r14), %tmm2 +; CHECK-NEXT: tileloadd (%r15,%r14), %tmm1 +; CHECK-NEXT: tileloadd (%r15,%r14), %tmm2 ; CHECK-NEXT: tdpbssd %tmm2, %tmm1, %tmm0 -; CHECK-NEXT: tilestored %tmm0, (%rbx,%r14) +; CHECK-NEXT: tilestored %tmm0, (%r15,%r14) ; CHECK-NEXT: tilezero %tmm0 -; CHECK-NEXT: incl %ebp -; CHECK-NEXT: cmpw $100, %bp +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: cmpw $100, %bx ; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: .LBB1_3: # %exit ; CHECK-NEXT: addq $72, %rsp diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -124,6 +124,7 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Tile Register Pre-configure +; CHECK-NEXT: Fast Tile Register Preconfigure ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks @@ -145,7 +146,10 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Greedy Register Allocator +; CHECK-NEXT: Virtual Register Rewriter ; CHECK-NEXT: Tile Register Configure +; CHECK-NEXT: Live Register Matrix +; CHECK-NEXT: Greedy Register Allocator ; CHECK-NEXT: Virtual Register Rewriter ; CHECK-NEXT: Register Allocation Pass Scoring ; CHECK-NEXT: Stack Slot Coloring diff --git a/llvm/test/CodeGen/X86/statepoint-ra.ll b/llvm/test/CodeGen/X86/statepoint-ra.ll --- a/llvm/test/CodeGen/X86/statepoint-ra.ll +++ b/llvm/test/CodeGen/X86/statepoint-ra.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true -pass-remarks-filter=regalloc -pass-remarks-output=%t.yaml -stop-after=greedy -o - < %s 2>&1 | FileCheck %s +; RUN: llc -x86-tile-ra=0 -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true -pass-remarks-filter=regalloc -pass-remarks-output=%t.yaml -stop-after=greedy -o - < %s 2>&1 | FileCheck %s ; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -5,7 +5,7 @@ ; This run is to demonstrate what MIR SSA looks like. ; RUN: llc -max-registers-for-gc-values=4 -stop-after finalize-isel < %s | FileCheck --check-prefix=CHECK-VREG %s ; This run is to demonstrate register allocator work. -; RUN: llc -max-registers-for-gc-values=4 -stop-after virtregrewriter < %s | FileCheck --check-prefix=CHECK-PREG %s +; RUN: llc -x86-tile-ra=0 -max-registers-for-gc-values=4 -stop-after virtregrewriter < %s | FileCheck --check-prefix=CHECK-PREG %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll @@ -1,4 +1,4 @@ -; RUN: llc -max-registers-for-gc-values=4 -stop-after virtregrewriter < %s | FileCheck %s +; RUN: llc -x86-tile-ra=0 -max-registers-for-gc-values=4 -stop-after virtregrewriter < %s | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll @@ -1,5 +1,5 @@ -; RUN: llc -max-registers-for-gc-values=18 -stop-before greedy < %s | FileCheck --check-prefix=CHECK-VREG %s -; RUN: llc -max-registers-for-gc-values=18 -stop-after virtregrewriter < %s | FileCheck --check-prefix=CHECK-PREG %s +; RUN: llc -x86-tile-ra=0 -max-registers-for-gc-values=18 -stop-before greedy < %s | FileCheck --check-prefix=CHECK-VREG %s +; RUN: llc -x86-tile-ra=0 -max-registers-for-gc-values=18 -stop-after virtregrewriter < %s | FileCheck --check-prefix=CHECK-PREG %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" diff --git a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding-tieddef.mir b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding-tieddef.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding-tieddef.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding-tieddef.mir @@ -1,4 +1,4 @@ -# RUN: llc %s -o - -experimental-debug-variable-locations \ +# RUN: llc %s -o - -x86-tile-ra=0 -experimental-debug-variable-locations \ # RUN: -start-before=x86-flags-copy-lowering -stop-after=virtregrewriter \ # RUN: -mtriple x86_64-unknown-unknown \ # RUN: | FileCheck %s diff --git a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding.mir b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-folding.mir @@ -1,4 +1,4 @@ -# RUN: llc %s -o - -experimental-debug-variable-locations \ +# RUN: llc %s -o - -x86-tile-ra=0 -experimental-debug-variable-locations \ # RUN: -start-before=x86-flags-copy-lowering -stop-after=virtregrewriter \ # RUN: -mtriple x86_64-unknown-unknown \ # RUN: | FileCheck %s diff --git a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-load-folding.mir b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-load-folding.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-load-folding.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-load-folding.mir @@ -1,4 +1,4 @@ -# RUN: llc %s -o - -experimental-debug-variable-locations \ +# RUN: llc %s -o - -x86-tile-ra=0 -experimental-debug-variable-locations \ # RUN: -start-before=phi-node-elimination -stop-after=virtregrewriter \ # RUN: -mtriple x86_64-unknown-unknown \ # RUN: | FileCheck %s diff --git a/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir b/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir @@ -3,7 +3,7 @@ # RUN: -start-before=phi-node-elimination \ # RUN: -stop-after=simple-register-coalescing \ # RUN: | FileCheck %s --check-prefix=DOESCOALESCE -# RUN: llc %s -o - -mtriple=x86_64-unknown-unknown \ +# RUN: llc %s -o - -x86-tile-ra=0 -mtriple=x86_64-unknown-unknown \ # RUN: -experimental-debug-variable-locations \ # RUN: -start-before=phi-node-elimination \ # RUN: -stop-after=virtregrewriter \ diff --git a/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced.mir b/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced.mir @@ -1,4 +1,4 @@ -# RUN: llc %s -o - -mtriple=x86_64-unknown-unknown \ +# RUN: llc %s -o - -x86-tile-ra=0 -mtriple=x86_64-unknown-unknown \ # RUN: -experimental-debug-variable-locations \ # RUN: -start-before=phi-node-elimination -stop-after=virtregrewriter \ # RUN: | FileCheck %s diff --git a/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced2.mir b/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced2.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced2.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/phi-on-stack-coalesced2.mir @@ -1,4 +1,4 @@ -# RUN: llc %s -o - -mtriple=x86_64-unknown-unknown \ +# RUN: llc %s -o - -x86-tile-ra=0 -mtriple=x86_64-unknown-unknown \ # RUN: -experimental-debug-variable-locations \ # RUN: -start-before=phi-node-elimination -stop-after=virtregrewriter \ # RUN: | FileCheck %s diff --git a/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir b/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir --- a/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir @@ -1,5 +1,5 @@ -# RUN: llc -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations | FileCheck %s -# RUN: llc -O0 -start-after=phi-node-elimination -x86-tile-ra=0 -stop-after=regallocfast %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations | FileCheck %s --check-prefix=FASTREG +# RUN: llc -x86-tile-ra=0 -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations | FileCheck %s +# RUN: llc -O0 -x86-tile-ra=0 -start-after=phi-node-elimination -stop-after=regallocfast %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations | FileCheck %s --check-prefix=FASTREG # # Test that DBG_INSTR_REFs can pass through livedebugvariables to the end of # regalloc without problem. Program body copied from diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir @@ -1,4 +1,4 @@ -# RUN: llc -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - | FileCheck %s +# RUN: llc -x86-tile-ra=0 -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - | FileCheck %s # # Test that when a livedebugvars interval is split, DBG_VALUE_LISTs are created # with the correct operands and exprs. Force values to be moved around between diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir @@ -1,4 +1,4 @@ -# RUN: llc -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - | FileCheck %s +# RUN: llc -x86-tile-ra=0 -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - | FileCheck %s # # This is a copy of the adjacent "-movements.mir" file, but where one of the # operands to DBG_VALUE_LIST is a stack _pointer_ rather than a spilt value. diff --git a/llvm/test/DebugInfo/MIR/X86/empty-inline.mir b/llvm/test/DebugInfo/MIR/X86/empty-inline.mir --- a/llvm/test/DebugInfo/MIR/X86/empty-inline.mir +++ b/llvm/test/DebugInfo/MIR/X86/empty-inline.mir @@ -1,4 +1,4 @@ -# RUN: llc -verify-machineinstrs -start-after=virtregrewriter -filetype=obj -o - %s | llvm-dwarfdump -a - | FileCheck %s +# RUN: llc -x86-tile-ra=0 -verify-machineinstrs -start-after=virtregrewriter -filetype=obj -o - %s | llvm-dwarfdump -a - | FileCheck %s # # This testcase has an implicit def pseudo-instruction with a debug location. # diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir --- a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir @@ -1,4 +1,4 @@ -# RUN: llc -O1 -start-before=greedy -stop-after=virtregrewriter -o /dev/null %s -debug-only=livedebugvars 2>&1 -experimental-debug-variable-locations=false | FileCheck -check-prefix=CHECKDBG %s +# RUN: llc -O1 -x86-tile-ra=0 -start-before=greedy -stop-after=virtregrewriter -o /dev/null %s -debug-only=livedebugvars 2>&1 -experimental-debug-variable-locations=false | FileCheck -check-prefix=CHECKDBG %s # REQUIRES: asserts diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir --- a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir @@ -1,4 +1,4 @@ -# RUN: llc -O1 -start-before=greedy -stop-after=virtregrewriter -o - %s -experimental-debug-variable-locations=false | FileCheck -check-prefix=CHECKMIR %s +# RUN: llc -O1 -x86-tile-ra=0 -start-before=greedy -stop-after=virtregrewriter -o - %s -experimental-debug-variable-locations=false | FileCheck -check-prefix=CHECKMIR %s # This test case was generated by using the following c program: # extern void foo(int, int); diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir --- a/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir @@ -1,4 +1,4 @@ -# RUN: llc -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations=false | FileCheck %s +# RUN: llc -x86-tile-ra=0 -start-after=phi-node-elimination -stop-after=virtregrewriter %s -mtriple=x86_64-unknown-unknown -o - -experimental-debug-variable-locations=false | FileCheck %s # # Test that when a livedebugvars interval crosses a basic block boundary, # DBG_VALUEs are created in each covered basic block. In the IR below, control diff --git a/llvm/test/DebugInfo/X86/live-debug-vars-discard-invalid.mir b/llvm/test/DebugInfo/X86/live-debug-vars-discard-invalid.mir --- a/llvm/test/DebugInfo/X86/live-debug-vars-discard-invalid.mir +++ b/llvm/test/DebugInfo/X86/live-debug-vars-discard-invalid.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=x86_64-linux-gnu -start-before greedy -stop-after virtregrewriter -o - -verify-machineinstrs %s -experimental-debug-variable-locations=false | FileCheck %s +# RUN: llc -mtriple=x86_64-linux-gnu -x86-tile-ra=0 -start-before greedy -stop-after virtregrewriter -o - -verify-machineinstrs %s -experimental-debug-variable-locations=false | FileCheck %s --- | ; ModuleID = '' diff --git a/llvm/test/DebugInfo/X86/live-debug-vars-intervals.mir b/llvm/test/DebugInfo/X86/live-debug-vars-intervals.mir --- a/llvm/test/DebugInfo/X86/live-debug-vars-intervals.mir +++ b/llvm/test/DebugInfo/X86/live-debug-vars-intervals.mir @@ -13,7 +13,7 @@ # escape(&x); # } -# RUN: llc %s -start-before=machine-scheduler -stop-after=virtregrewriter -o - -experimental-debug-variable-locations=false \ +# RUN: llc %s -x86-tile-ra=0 -start-before=machine-scheduler -stop-after=virtregrewriter -o - -experimental-debug-variable-locations=false \ # RUN: | FileCheck %s --implicit-check-not=DBG_VALUE # Verify that DBG_VALUEs with same { Variable, Fragment } but different DIExpressions diff --git a/llvm/test/DebugInfo/X86/live-debug-vars-loc-limit.ll b/llvm/test/DebugInfo/X86/live-debug-vars-loc-limit.ll --- a/llvm/test/DebugInfo/X86/live-debug-vars-loc-limit.ll +++ b/llvm/test/DebugInfo/X86/live-debug-vars-loc-limit.ll @@ -1,4 +1,4 @@ -; RUN: llc --stop-after=virtregrewriter -o - %s | FileCheck %s +; RUN: llc -x86-tile-ra=0 --stop-after=virtregrewriter -o - %s | FileCheck %s ; Check that any debug value with 64+ unique machine location operands is set ; undef by LiveDebugVariables.