diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -44,6 +44,7 @@ const TargetRegisterInfo *TRI = nullptr; const TargetInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; + X86MachineFunctionInfo *X86FI = nullptr; MachineInstr *getTileConfigPoint(); void tileConfig(); @@ -289,6 +290,8 @@ if (!CFGs.empty()) Changed = true; } + if (Changed) + X86FI->setHasVirtualTileReg(true); return Changed; } @@ -298,6 +301,7 @@ ST = &MFunc.getSubtarget(); TRI = ST->getRegisterInfo(); TII = MFunc.getSubtarget().getInstrInfo(); + X86FI = MFunc.getInfo(); return fastTileConfig(); } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2219,13 +2219,8 @@ } // Emit tilerelease for AMX kernel. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); - for (unsigned I = 0; I < RC->getNumRegs(); I++) - if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) { - BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); - break; - } + if (X86FI->hasVirtualTileReg()) + BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); } StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -113,6 +113,10 @@ /// other tools to detect the extended record. bool HasSwiftAsyncContext = false; + /// True if this function has tile virtual register. This is used to + /// determine if we should insert tilerelease in frame lowering. + bool HasVirtualTileReg = false; + Optional SwiftAsyncContextFrameIdx; ValueMap PreallocatedIds; @@ -207,6 +211,9 @@ bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; } void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; } + bool hasVirtualTileReg() const { return HasVirtualTileReg; } + void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + Optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -25,6 +25,7 @@ #include "X86.h" #include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -235,6 +236,7 @@ const TargetInstrInfo *TII = ST.getInstrInfo(); const TargetRegisterInfo *TRI = ST.getRegisterInfo(); const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); + X86MachineFunctionInfo *X86FI = MF.getInfo(); BitVector AMXRegs(TRI->getNumRegs()); for (unsigned I = 0; I < RC->getNumRegs(); I++) @@ -294,6 +296,7 @@ // There's no AMX instruction if we didn't find a tile config live in point. if (CfgNeedInsert.empty()) return false; + X86FI->setHasVirtualTileReg(true); // Avoid to insert ldtilecfg before any shape defs. SmallVector WorkList; diff --git a/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll --- a/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-bf16-intrinsics.ll @@ -5,7 +5,6 @@ ; CHECK-LABEL: test_amx: ; CHECK: # %bb.0: ; CHECK-NEXT: tdpbf16ps %tmm7, %tmm4, %tmm3 -; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq call void @llvm.x86.tdpbf16ps(i8 3, i8 4, i8 7) ret void diff --git a/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll --- a/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-int8-intrinsics.ll @@ -8,7 +8,6 @@ ; CHECK-NEXT: tdpbsud %tmm7, %tmm4, %tmm3 ; CHECK-NEXT: tdpbusd %tmm7, %tmm0, %tmm3 ; CHECK-NEXT: tdpbuud %tmm1, %tmm4, %tmm3 -; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq call void @llvm.x86.tdpbssd(i8 3, i8 4, i8 7) diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll --- a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll @@ -11,7 +11,6 @@ ; CHECK-NEXT: tileloadd (%rsi,%rdx), %tmm3 ; CHECK-NEXT: tileloaddt1 (%rsi,%rdx), %tmm3 ; CHECK-NEXT: tilestored %tmm3, (%rsi,%rdx) -; CHECK-NEXT: tilerelease ; CHECK-NEXT: retq call void @llvm.x86.ldtilecfg(i8* %pointer)