diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -103,9 +103,8 @@ const X86Subtarget *ST) { auto *MBB = MI->getParent(); - // FIXME: AMX should assume AVX512 enabled. + // Zero stack slot. if (ST->hasAVX512()) { - // Zero stack slot. Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORDZrr), Zmm) .addReg(Zmm, RegState::Undef) @@ -113,6 +112,35 @@ addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSZmr)), FrameIdx) .addReg(Zmm); + } else if (ST->hasAVX2()) { + Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); + BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORYrr), Ymm) + .addReg(Ymm, RegState::Undef) + .addReg(Ymm, RegState::Undef); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSYmr)), + FrameIdx) + .addReg(Ymm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSYmr)), + FrameIdx, 32) + .addReg(Ymm); + } else { + assert(ST->hasSSE2() && "AMX should assume SSE2 enabled"); + Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); + BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::PXORrr), Xmm) + .addReg(Xmm, RegState::Undef) + .addReg(Xmm, RegState::Undef); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx) + .addReg(Xmm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx, 16) + .addReg(Xmm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx, 32) + .addReg(Xmm); + addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)), + FrameIdx, 48) + .addReg(Xmm); } // build psuedo ldtilecfg diff --git a/llvm/test/CodeGen/X86/AMX/amx-config.ll b/llvm/test/CodeGen/X86/AMX/amx-config.ll --- a/llvm/test/CodeGen/X86/AMX/amx-config.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-config.ll @@ -45,6 +45,9 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: testl %edi, %edi ; AVX2-NEXT: movsbl %sil, %eax +; AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movw %si, -{{[0-9]+}}(%rsp) @@ -69,12 +72,18 @@ ; AVX2-NEXT: movl $32, %esi ; AVX2-NEXT: tilestored %tmm1, (%rcx,%rsi) ; AVX2-NEXT: tilerelease +; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; SSE2-LABEL: test_api: ; SSE2: # %bb.0: ; SSE2-NEXT: testl %edi, %edi ; SSE2-NEXT: movsbl %sil, %eax +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movw %si, -{{[0-9]+}}(%rsp)