diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp --- a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp @@ -515,7 +515,7 @@ CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(), ST->getTileConfigAlignment(), false); LastTileCfg = addFrameReference( - BuildMI(MBB, Before, DebugLoc(), TII->get(X86::LDTILECFG)), CfgSS); + BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS); LastShapeMI = nullptr; Change = true; }; diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp --- a/llvm/lib/Target/X86/X86FastTileConfig.cpp +++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp @@ -110,15 +110,15 @@ bool Change = false; SmallVector, 6> ShapeInfos; for (MachineInstr &MI : reverse(MBB)) { - if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::LDTILECFG) + if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV) continue; // AMX instructions that define tile register. - if (MI.getOpcode() != X86::LDTILECFG) { + if (MI.getOpcode() != X86::PLDTILECFGV) { MachineOperand &Row = MI.getOperand(1); MachineOperand &Col = MI.getOperand(2); unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0; ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)}); - } else { // LDTILECFG + } else { // PLDTILECFGV // Rewrite the shape information to memory. Stack slot should have // been initialized to zero in pre config. int SS = MI.getOperand(0).getIndex(); // tile config stack slot. diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -49,8 +49,7 @@ // Pseduo instruction for RA. let isPseudo = true, mayLoad = 1 in - def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), - [(int_x86_ldtilecfg_internal addr:$src)]>; + def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>; let isPseudo = true, mayLoad = 1 in def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7360,7 +7360,7 @@ // ENDBR instructions should not be scheduled around. unsigned Opcode = MI.getOpcode(); if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 || - Opcode == X86::LDTILECFG) + Opcode == X86::PLDTILECFGV) return true; return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -369,7 +369,7 @@ // multi insert. if (VisitedOrInserted.insert(I).second) { auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin(); - addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)), + addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)), SS); } } diff --git a/llvm/lib/Target/X86/X86TileConfig.cpp b/llvm/lib/Target/X86/X86TileConfig.cpp --- a/llvm/lib/Target/X86/X86TileConfig.cpp +++ b/llvm/lib/Target/X86/X86TileConfig.cpp @@ -90,7 +90,7 @@ int SS = INT_MAX; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == X86::LDTILECFG) { + if (MI.getOpcode() == X86::PLDTILECFGV) { SS = MI.getOperand(0).getIndex(); break; } @@ -98,7 +98,7 @@ if (SS != INT_MAX) break; } - // Didn't find LDTILECFG, just return false; + // Didn't find PLDTILECFGV, just return false; if (SS == INT_MAX) return false; diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir @@ -114,7 +114,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -127,7 +127,7 @@ ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri3]], [[MOV16ri2]], [[COPY3]], 1, killed [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -139,7 +139,7 @@ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri2]], %bb.2 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[MOV16ri3]], %bb.2 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r1]], %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg @@ -147,13 +147,13 @@ ; CHECK-NEXT: TILESTORED %stack.5, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.5) ; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]] ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.4) ; CHECK-NEXT: [[MOV16ri6:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri7:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri7]], [[MOV16ri6]] ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.3) @@ -161,7 +161,7 @@ ; CHECK-NEXT: JMP_1 %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri8:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri9:%[0-9]+]]:gr16 = MOV16ri 16 @@ -177,14 +177,14 @@ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[PHI]], %bb.3, %60, %bb.8 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr16 = PHI [[PHI1]], %bb.3, %59, %bb.8 ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.3, %58, %bb.8 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], [[PHI6]], 1, killed [[MOV64ri7]], 0, $noreg ; CHECK-NEXT: [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.8, 1, killed [[MOV64ri8]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.8) ; CHECK-NEXT: [[MOV16ri10:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri11:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri9:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri7]], [[MOV16ri6]], %stack.3, 1, killed [[MOV64ri9]], 0, $noreg :: (load (s8192) from %stack.3) ; CHECK-NEXT: [[MOV64ri10:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -200,7 +200,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri12:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri13:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV3:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri13]], [[MOV16ri12]] ; CHECK-NEXT: [[MOV64ri12:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -213,7 +213,7 @@ ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri14:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri15:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.7, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri15]], [[MOV16ri14]], [[COPY3]], 1, killed [[MOV32ri64_2]], 0, $noreg ; CHECK-NEXT: [[MOV64ri13:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -225,7 +225,7 @@ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[MOV16ri12]], %bb.6, [[MOV16ri14]], %bb.7 ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr16 = PHI [[MOV16ri13]], %bb.6, [[MOV16ri15]], %bb.7 ; CHECK-NEXT: [[PHI9:%[0-9]+]]:gr64_nosp = PHI [[LEA64r3]], %bb.6, [[LEA64r4]], %bb.7 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri14:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI8]], [[PHI7]], [[PHI9]], 1, killed [[MOV64ri14]], 0, $noreg ; CHECK-NEXT: [[MOV64ri15:%[0-9]+]]:gr64_nosp = MOV64ri 64 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi2.mir @@ -51,7 +51,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]] ; CHECK-NEXT: %r0:gr16 = MOV16ri 64 ; CHECK-NEXT: %c0:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: %t0:tile = PTILEZEROV %r0, %c0 ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -66,7 +66,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -79,7 +79,7 @@ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %24, %bb.3 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %23, %bb.3 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %22, %bb.3 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -92,7 +92,7 @@ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[PHI]], %bb.2 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[PHI1]], %bb.2 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[PHI2]], %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri4]], 0, $noreg ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi4.mir @@ -52,7 +52,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed [[COPY]] ; CHECK-NEXT: %r0:gr16 = MOV16ri 64 ; CHECK-NEXT: %c0:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: %t0:tile = PTILEZEROV %r0, %c0 @@ -68,7 +68,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV killed [[MOV16ri1]], killed [[MOV16ri]] ; CHECK-NEXT: JMP_1 %bb.3 ; CHECK-NEXT: {{ $}} @@ -81,14 +81,14 @@ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI %c0, %bb.0, %11, %bb.3, %17, %bb.2 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI %r0, %bb.0, %12, %bb.3, %18, %bb.2 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.0, %24, %bb.3, %25, %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri1]], 0, $noreg ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri2]], 0, $noreg ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.3, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri3]], [[MOV16ri2]] @@ -102,7 +102,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64 ; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16 - ; CHECK-NEXT: LDTILECFG %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.1, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg :: (load (s512) from %stack.1, align 4) ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[LEA64r5:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]] diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig-spill.mir @@ -34,7 +34,7 @@ ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3) @@ -48,7 +48,7 @@ ; CHECK-NEXT: JMP_1 %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV %row, %col, [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2) @@ -111,7 +111,7 @@ ; CHECK-NEXT: MOV8mi %stack.4, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.4, align 4) ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.3) @@ -123,7 +123,7 @@ ; CHECK-NEXT: JMP_1 %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: LDTILECFG %stack.4, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.4, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.4, 1, $noreg, 0, $noreg :: (load (s512) from %stack.4, align 4) ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: %t:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1, killed [[MOV64ri2]], 0, $noreg :: (load (s8192) from %stack.2) diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir --- a/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir @@ -102,7 +102,7 @@ ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg :: (load (s512) from %stack.3, align 4) ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -123,7 +123,7 @@ ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2 ; CHECK-NEXT: [[MOV32ri64_3:%[0-9]+]]:gr64_nosp = MOV32ri64 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg :: (load (s512) from %stack.3, align 4) ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri1]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 @@ -147,7 +147,7 @@ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2 ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2 ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.1, [[LEA64r5]], %bb.2 - ; CHECK-NEXT: LDTILECFG %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.3, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg :: (load (s512) from %stack.3, align 4) ; CHECK-NEXT: [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64 ; CHECK-NEXT: [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri6]], 0, $noreg ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64 diff --git a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir --- a/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir +++ b/llvm/test/CodeGen/X86/AMX/amx-fastpreconfig.mir @@ -34,7 +34,7 @@ ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0, 1, $noreg, 0, $noreg ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 32 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 - ; CHECK-NEXT: LDTILECFG %stack.2, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load store (s512) on %stack.2, align 4) + ; CHECK-NEXT: PLDTILECFGV %stack.2, 1, $noreg, 0, $noreg :: (load (s512) from %stack.2, align 4) ; CHECK-NEXT: $tmm0 = TILELOADD [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], 1, [[MOV32ri64_]], 0, $noreg diff --git a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll --- a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll @@ -38,9 +38,9 @@ ; CHECK-NEXT: tilestored %tmm3, 3024(%rsp,%rax) # 1024-byte Folded Spill ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo -; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: movabsq $64, %rax ; CHECK-NEXT: tileloadd 3024(%rsp,%rax), %tmm3 # 1024-byte Folded Reload +; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm0 ; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm1 ; CHECK-NEXT: # implicit-def: $rax @@ -121,8 +121,8 @@ ; CHECK-NEXT: tilezero %tmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo -; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: tilezero %tmm2 +; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) ; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm0 ; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm1 ; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll --- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll @@ -51,9 +51,9 @@ ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo -; CHECK-NEXT: ldtilecfg (%rsp) ; CHECK-NEXT: movabsq $64, %rax ; CHECK-NEXT: tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload +; CHECK-NEXT: ldtilecfg (%rsp) ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .LBB0_2: # %if.false ; CHECK-NEXT: movl $buf, %eax @@ -69,9 +69,9 @@ ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo -; CHECK-NEXT: ldtilecfg (%rsp) ; CHECK-NEXT: movabsq $64, %rax ; CHECK-NEXT: tileloadd 64(%rsp,%rax), %tmm6 # 1024-byte Folded Reload +; CHECK-NEXT: ldtilecfg (%rsp) ; CHECK-NEXT: tilestored %tmm6, (%r15,%r14) ; CHECK-NEXT: .LBB0_3: # %exit ; CHECK-NEXT: movl $buf, %eax diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll --- a/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -verify-machineinstrs | FileCheck %s define void @test_amx(i8* %pointer, i8* %base, i64 %stride) { ; CHECK-LABEL: test_amx: