diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -378,36 +378,31 @@ // Zero stack slot. MachineBasicBlock &MBB = MF.front(); MachineInstr *MI = &*MBB.begin(); + auto ZeroSS = [&](unsigned Opc) { + Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); + BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), SS).addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), SS, 16).addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), SS, 32).addReg(Xmm); + addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), SS, 48).addReg(Xmm); + }; if (ST.hasAVX512()) { Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); - BuildMI(MBB, MI, DL, TII->get(X86::VPXORDZrr), Zmm) - .addReg(Zmm, RegState::Undef) - .addReg(Zmm, RegState::Undef); + BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm); addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS) .addReg(Zmm); } else if (ST.hasAVX2()) { Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass); - BuildMI(MBB, MI, DL, TII->get(X86::VPXORYrr), Ymm) - .addReg(Ymm, RegState::Undef) - .addReg(Ymm, RegState::Undef); + BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm); addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS) .addReg(Ymm); addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32) .addReg(Ymm); + } else if (ST.hasAVX()) { + ZeroSS(X86::VMOVUPSmr); } else { assert(ST.hasSSE2() && "AMX should assume SSE2 enabled"); - Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass); - BuildMI(MBB, MI, DL, TII->get(X86::PXORrr), Xmm) - .addReg(Xmm, RegState::Undef) - .addReg(Xmm, RegState::Undef); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS) - .addReg(Xmm); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 16) - .addReg(Xmm); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 32) - .addReg(Xmm); - addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 48) - .addReg(Xmm); + ZeroSS(X86::MOVUPSmr); } // Fill in the palette first. addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), SS).addImm(1); diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -26,8 +26,8 @@ ; CHECK-NEXT: subq $2120, %rsp # imm = 0x848 ; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movl %edi, %ebp -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, (%rsp) ; CHECK-NEXT: movb $1, (%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) @@ -68,8 +68,8 @@ ; IPRA-LABEL: test_api: ; IPRA: # %bb.0: ; IPRA-NEXT: subq $72, %rsp -; IPRA-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; IPRA-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; IPRA-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; IPRA-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movb $1, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movw $8, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movb $8, {{[0-9]+}}(%rsp) @@ -113,8 +113,8 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $1096, %rsp # imm = 0x448 ; CHECK-NEXT: movl %edi, %r14d -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, (%rsp) ; CHECK-NEXT: movb $1, (%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) @@ -179,8 +179,8 @@ ; IPRA: # %bb.0: ; IPRA-NEXT: subq $72, %rsp ; IPRA-NEXT: movl %edi, %eax -; IPRA-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; IPRA-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; IPRA-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; IPRA-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movb $1, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movb $8, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movw $8, {{[0-9]+}}(%rsp) @@ -272,8 +272,8 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $1088, %rsp # imm = 0x440 ; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, (%rsp) ; CHECK-NEXT: movb $1, (%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) @@ -312,8 +312,8 @@ ; IPRA-LABEL: test_loop2: ; IPRA: # %bb.0: ; IPRA-NEXT: subq $72, %rsp -; IPRA-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; IPRA-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; IPRA-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; IPRA-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movb $1, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movb $8, {{[0-9]+}}(%rsp) ; IPRA-NEXT: movw $8, {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-config.ll b/llvm/test/CodeGen/X86/AMX/amx-config.ll --- a/llvm/test/CodeGen/X86/AMX/amx-config.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-config.ll @@ -1,17 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefix=AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx2 -verify-machineinstrs | FileCheck %s --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+avx512f -verify-machineinstrs | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+avx2 -verify-machineinstrs | FileCheck %s --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8,+avx -verify-machineinstrs | FileCheck %s --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -verify-machineinstrs | FileCheck %s --check-prefix=SSE2 @buf = dso_local global [1024 x i8] zeroinitializer, align 64 @buf2 = dso_local global [1024 x i8] zeroinitializer, align 64 ; Function Attrs: nounwind uwtable -define dso_local void @test_api(i32 %0, i16 signext %1, i16 signext %2) { +define <4 x i32> @test_api(i32 %0, i16 signext %1, i16 signext %2, <4 x i32> %xmm0) { ; AVX512-LABEL: test_api: ; AVX512: # %bb.0: -; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %dx, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movw %dx, -{{[0-9]+}}(%rsp) @@ -43,9 +44,9 @@ ; ; AVX2-LABEL: test_api: ; AVX2: # %bb.0: -; AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0 -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) @@ -75,13 +76,48 @@ ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; +; AVX1-LABEL: test_api: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movb $1, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movw %si, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: testl %edi, %edi +; AVX1-NEXT: movsbl %sil, %eax +; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) +; AVX1-NEXT: je .LBB0_2 +; AVX1-NEXT: # %bb.1: +; AVX1-NEXT: movl $buf, %ecx +; AVX1-NEXT: jmp .LBB0_3 +; AVX1-NEXT: .LBB0_2: +; AVX1-NEXT: movl $buf2, %ecx +; AVX1-NEXT: .LBB0_3: +; AVX1-NEXT: movl $32, %edi +; AVX1-NEXT: tileloadd (%rcx,%rdi), %tmm0 +; AVX1-NEXT: tileloadd (%rcx,%rdi), %tmm2 +; AVX1-NEXT: tileloadd (%rcx,%rdi), %tmm1 +; AVX1-NEXT: tdpbssd %tmm2, %tmm0, %tmm1 +; AVX1-NEXT: movl $buf, %ecx +; AVX1-NEXT: movl $32, %esi +; AVX1-NEXT: tilestored %tmm1, (%rcx,%rsi) +; AVX1-NEXT: tilerelease +; AVX1-NEXT: retq +; ; SSE2-LABEL: test_api: ; SSE2: # %bb.0: -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) @@ -132,11 +168,9 @@ %18 = phi x86_amx [ %14, %11 ], [ %10, %7 ] %19 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %6, i16 %2, i16 %1, x86_amx %18, x86_amx %16, x86_amx %17) tail call void @llvm.x86.tilestored64.internal(i16 %6, i16 %2, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx %19) - ret void + ret <4 x i32> %xmm0 } declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64) - declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) - declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) diff --git a/llvm/test/CodeGen/X86/AMX/amx-intrinsic-chain.ll b/llvm/test/CodeGen/X86/AMX/amx-intrinsic-chain.ll --- a/llvm/test/CodeGen/X86/AMX/amx-intrinsic-chain.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-intrinsic-chain.ll @@ -4,8 +4,8 @@ define dso_local void @test_chain(i8* %A_mem, i8* %B_mem, i8* %C_mem) { ; CHECK-LABEL: test_chain: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $16, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $64, -{{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll --- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll @@ -5,8 +5,8 @@ define dso_local void @test1(i16 signext %0, i16 signext %1) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) @@ -46,8 +46,8 @@ ; CHECK-NEXT: subq $72, %rsp ; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movl %edi, %ebp -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) @@ -119,8 +119,8 @@ define dso_local void @test3(i16 signext %0, i16 signext %1) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movw %si, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb %dil, -{{[0-9]+}}(%rsp) @@ -162,8 +162,8 @@ define dso_local void @test4(i16 signext %0, i16 signext %1) nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movw %si, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb %dil, -{{[0-9]+}}(%rsp) @@ -231,8 +231,8 @@ ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movw %si, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb %dil, -{{[0-9]+}}(%rsp) @@ -292,8 +292,8 @@ ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: xorl %r8d, %r8d diff --git a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll --- a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $4056, %rsp # imm = 0xFD8 -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) @@ -96,8 +96,8 @@ ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $72, %rsp -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll --- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: subq $2120, %rsp # imm = 0x848 ; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movl %edi, %ebp -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, (%rsp) ; CHECK-NEXT: movb $1, (%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) @@ -121,8 +121,8 @@ ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $72, %rsp -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill.ll b/llvm/test/CodeGen/X86/AMX/amx-spill.ll --- a/llvm/test/CodeGen/X86/AMX/amx-spill.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-spill.ll @@ -8,8 +8,8 @@ ; CHECK-LABEL: test_api: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $968, %rsp # imm = 0x3C8 -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movw %dx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb %sil, -{{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll --- a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll @@ -4,8 +4,8 @@ define void @test_amx(i8* %pointer, i8* %base, i64 %stride) { ; CHECK-LABEL: test_amx: ; CHECK: # %bb.0: -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp)