diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -404,10 +404,6 @@ return false; } - /// addPostFastRegAllocRewrite - Add passes to the optimized register - /// allocation pipeline after fast register allocation is complete. - virtual bool addPostFastRegAllocRewrite() { return false; } - /// Add passes to be run immediately after virtual registers are rewritten /// to physical registers. virtual void addPostRewrite() { } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1414,9 +1414,6 @@ addPass(createRegAllocPass(false)); - // Allow targets to change the register assignments after - // fast register allocation. - addPostFastRegAllocRewrite(); return true; } diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -146,11 +146,10 @@ int RowOffset = 48 + TMMIdx; int ColOffset = 16 + TMMIdx * 2; - Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit); - BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg); MachineInstrBuilder StoreRow = BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr)); - addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg); + addFrameReference(StoreRow, SS, RowOffset) + .addReg(RowReg, 0, X86::sub_8bit); MachineInstrBuilder StoreCol = BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr)); diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -385,7 +385,6 @@ bool addPreISel() override; void addMachineSSAOptimization() override; void addPreRegAlloc() override; - bool addPostFastRegAllocRewrite() override; void addPostRegAlloc() override; void addPreEmitPass() override; void addPreEmitPass2() override; @@ -619,16 +618,13 @@ bool X86PassConfig::addRegAssignAndRewriteFast() { // Allocate AMX registers separately. - if (EnableTileRAPass) + if (EnableTileRAPass) { addPass(createFastRegisterAllocator(onlyAllocateTileRegisters, false)); + addPass(createX86FastTileConfigPass()); + } return TargetPassConfig::addRegAssignAndRewriteFast(); } -bool X86PassConfig::addPostFastRegAllocRewrite() { - addPass(createX86FastTileConfigPass()); - return true; -} - bool X86PassConfig::addPreRewrite() { addPass(createX86TileConfigPass()); return true; diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -123,7 +123,6 @@ ; O0-NEXT: movl $buf, %r8d ; O0-NEXT: movl $32, %r9d ; O0-NEXT: movw $8, %si -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %si, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -134,7 +133,6 @@ ; O0-NEXT: movl $32, %edi ; O0-NEXT: movl $buf+1024, %esi ; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -150,7 +148,6 @@ ; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload ; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -161,7 +158,6 @@ ; O0-NEXT: tilestored %tmm0, (%rsi,%rdi) ; O0-NEXT: movl $64, %r10d ; O0-NEXT: movw $8, %di -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %di, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -170,19 +166,14 @@ ; O0-NEXT: tilestored %tmm0, 1024(%rsp,%r8) # 1024-byte Folded Spill ; O0-NEXT: movl $64, %r10d ; O0-NEXT: movw $8, %r8w -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %di, {{[0-9]+}}(%rsp) -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; O0-NEXT: # implicit-def: $r8b ; O0-NEXT: movb %r8b, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -373,7 +364,6 @@ ; O0-NEXT: movl $buf, %edx ; O0-NEXT: movl $32, %esi ; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -385,7 +375,6 @@ ; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; O0-NEXT: movl $64, %edx ; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -408,7 +397,6 @@ ; O0-NEXT: movl $32, %esi ; O0-NEXT: movl $buf+1024, %edx ; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -418,7 +406,6 @@ ; O0-NEXT: tilestored %tmm0, (%rcx,%rdx) ; O0-NEXT: movl $64, %edx ; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -587,7 +574,6 @@ ; O0-NEXT: movl $buf, %edx ; O0-NEXT: movl $32, %esi ; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -599,7 +585,6 @@ ; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; O0-NEXT: movl $64, %edx ; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll b/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll --- a/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-configO0toO0.ll @@ -63,7 +63,6 @@ ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -91,7 +90,6 @@ ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -119,7 +117,6 @@ ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -149,7 +146,6 @@ ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -177,7 +173,6 @@ ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -205,7 +200,6 @@ ; AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -521,16 +515,12 @@ ; AVX512-NEXT: movw %r8w, %di ; AVX512-NEXT: shrl $2, %r8d ; AVX512-NEXT: movw %r8w, %r9w -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $r9b ; AVX512-NEXT: movb %r9b, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -638,7 +628,6 @@ ; AVX512-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %ax ; AVX512-NEXT: movw {{[0-9]+}}(%rsp), %cx -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll --- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll @@ -37,7 +37,6 @@ ; AVX512-NEXT: movl $buf, %r9d ; AVX512-NEXT: movl $32, %r10d ; AVX512-NEXT: movw $8, %si -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -48,10 +47,8 @@ ; AVX512-NEXT: movl $buf, %r8d ; AVX512-NEXT: movl $32, %r9d ; AVX512-NEXT: movw $8, %si -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $sil ; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -74,7 +71,6 @@ ; AVX512-NEXT: movl $buf2, %r9d ; AVX512-NEXT: movl $32, %r10d ; AVX512-NEXT: movw $8, %si -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -85,10 +81,8 @@ ; AVX512-NEXT: movl $buf2, %r8d ; AVX512-NEXT: movl $32, %r9d ; AVX512-NEXT: movw $8, %si -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $sil ; AVX512-NEXT: movb %sil, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -110,7 +104,6 @@ ; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload ; AVX512-NEXT: movl $64, %r10d ; AVX512-NEXT: movw $8, %di -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -119,19 +112,14 @@ ; AVX512-NEXT: tilestored %tmm0, 1024(%rsp,%r8) # 1024-byte Folded Spill ; AVX512-NEXT: movl $64, %r10d ; AVX512-NEXT: movw $8, %r8w -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %di, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $al ; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; AVX512-NEXT: # implicit-def: $r8b ; AVX512-NEXT: movb %r8b, {{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll b/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll --- a/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll @@ -73,7 +73,6 @@ ; AVX512-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; AVX512-O0-NEXT: movw $32, %cx ; AVX512-O0-NEXT: movw $8, %ax -; AVX512-O0-NEXT: # implicit-def: $al ; AVX512-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -85,7 +84,6 @@ ; AVX512-O0-NEXT: movl $64, %esi ; AVX512-O0-NEXT: movw $32, %cx ; AVX512-O0-NEXT: movw $8, %ax -; AVX512-O0-NEXT: # implicit-def: $al ; AVX512-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX512-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX512-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -113,7 +111,6 @@ ; AVX2-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; AVX2-O0-NEXT: movw $32, %cx ; AVX2-O0-NEXT: movw $8, %ax -; AVX2-O0-NEXT: # implicit-def: $al ; AVX2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -125,7 +122,6 @@ ; AVX2-O0-NEXT: movl $64, %esi ; AVX2-O0-NEXT: movw $32, %cx ; AVX2-O0-NEXT: movw $8, %ax -; AVX2-O0-NEXT: # implicit-def: $al ; AVX2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; AVX2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -155,7 +151,6 @@ ; SSE2-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; SSE2-O0-NEXT: movw $32, %cx ; SSE2-O0-NEXT: movw $8, %ax -; SSE2-O0-NEXT: # implicit-def: $al ; SSE2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) @@ -167,7 +162,6 @@ ; SSE2-O0-NEXT: movl $64, %esi ; SSE2-O0-NEXT: movw $32, %cx ; SSE2-O0-NEXT: movw $8, %ax -; SSE2-O0-NEXT: # implicit-def: $al ; SSE2-O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; SSE2-O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -45,8 +45,8 @@ ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator -; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Fast Tile Register Configure +; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: X86 Lower Tile Copy ; CHECK-NEXT: Bundle Machine CFG Edges ; CHECK-NEXT: X86 FP Stackifier