diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3688,7 +3688,7 @@ auto Reg = mc2PseudoReg(Src0.getReg()); const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (!isGFX940() && isSGPR(Reg, TRI)) { + if (!isGFX90A() && isSGPR(Reg, TRI)) { Error(getRegLoc(Reg, Operands), "source operand must be either a VGPR or an inline constant"); return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -822,7 +822,7 @@ if (RC == &AMDGPU::AGPR_32RegClass) { if (AMDGPU::VGPR_32RegClass.contains(SrcReg) || - (ST.hasGFX940Insts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) { + (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) { BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; @@ -949,7 +949,7 @@ if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC)) Opcode = AMDGPU::V_ACCVGPR_MOV_B32; else if (RI.hasVGPRs(SrcRC) || - (ST.hasGFX940Insts() && RI.isSGPRClass(SrcRC))) + (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC))) Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64; else Opcode = AMDGPU::INSTRUCTION_LIST_END; @@ -4647,7 +4647,7 @@ } if (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - !ST.hasGFX940Insts()) { + !ST.hasGFX90AInsts()) { const MachineOperand *Src = getNamedOperand(MI, AMDGPU::OpName::src0); if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) { ErrInfo = "Invalid register class: " @@ -5032,7 +5032,7 @@ RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) return false; } - if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX940Insts() && + if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() && (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && RI.isSGPRReg(MRI, MO->getReg())) return false; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll @@ -22,70 +22,38 @@ ; GCN-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x0 ; GCN-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x40 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, s0 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s1 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s2 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s3 -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_accvgpr_write_b32 a5, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s6 -; GCN-NEXT: v_accvgpr_write_b32 a6, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s7 -; GCN-NEXT: v_accvgpr_write_b32 a7, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s8 -; GCN-NEXT: v_accvgpr_write_b32 a8, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s9 -; GCN-NEXT: v_accvgpr_write_b32 a9, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s10 -; GCN-NEXT: v_accvgpr_write_b32 a10, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s11 -; GCN-NEXT: v_accvgpr_write_b32 a11, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s12 -; GCN-NEXT: v_accvgpr_write_b32 a12, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s13 -; GCN-NEXT: v_accvgpr_write_b32 a13, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s14 -; GCN-NEXT: v_accvgpr_write_b32 a14, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s15 -; GCN-NEXT: v_accvgpr_write_b32 a15, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s16 -; GCN-NEXT: v_accvgpr_write_b32 a16, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s17 -; GCN-NEXT: v_accvgpr_write_b32 a17, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s18 -; GCN-NEXT: v_accvgpr_write_b32 a18, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s19 -; GCN-NEXT: v_accvgpr_write_b32 a19, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: v_accvgpr_write_b32 a20, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s21 -; GCN-NEXT: v_accvgpr_write_b32 a21, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s22 -; GCN-NEXT: v_accvgpr_write_b32 a22, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s23 -; GCN-NEXT: v_accvgpr_write_b32 a23, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s24 -; GCN-NEXT: v_accvgpr_write_b32 a24, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s25 -; GCN-NEXT: v_accvgpr_write_b32 a25, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s26 -; GCN-NEXT: v_accvgpr_write_b32 a26, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s27 -; GCN-NEXT: v_accvgpr_write_b32 a27, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s28 -; GCN-NEXT: v_accvgpr_write_b32 a28, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s29 -; GCN-NEXT: v_accvgpr_write_b32 a29, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s30 -; GCN-NEXT: v_accvgpr_write_b32 a30, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s31 -; GCN-NEXT: v_accvgpr_write_b32 a31, v4 +; GCN-NEXT: v_accvgpr_write_b32 a0, s0 +; GCN-NEXT: v_accvgpr_write_b32 a1, s1 +; GCN-NEXT: v_accvgpr_write_b32 a2, s2 +; GCN-NEXT: v_accvgpr_write_b32 a3, s3 +; GCN-NEXT: v_accvgpr_write_b32 a4, s4 +; GCN-NEXT: v_accvgpr_write_b32 a5, s5 +; GCN-NEXT: v_accvgpr_write_b32 a6, s6 +; GCN-NEXT: v_accvgpr_write_b32 a7, s7 +; GCN-NEXT: v_accvgpr_write_b32 a8, s8 +; GCN-NEXT: v_accvgpr_write_b32 a9, s9 +; GCN-NEXT: v_accvgpr_write_b32 a10, s10 +; GCN-NEXT: v_accvgpr_write_b32 a11, s11 +; GCN-NEXT: v_accvgpr_write_b32 a12, s12 +; GCN-NEXT: v_accvgpr_write_b32 a13, s13 +; GCN-NEXT: v_accvgpr_write_b32 a14, s14 +; GCN-NEXT: v_accvgpr_write_b32 a15, s15 +; GCN-NEXT: v_accvgpr_write_b32 a16, s16 +; GCN-NEXT: v_accvgpr_write_b32 a17, s17 +; GCN-NEXT: v_accvgpr_write_b32 a18, s18 +; GCN-NEXT: v_accvgpr_write_b32 a19, s19 +; GCN-NEXT: v_accvgpr_write_b32 a20, s20 +; GCN-NEXT: v_accvgpr_write_b32 a21, s21 +; GCN-NEXT: v_accvgpr_write_b32 a22, s22 +; GCN-NEXT: v_accvgpr_write_b32 a23, s23 +; GCN-NEXT: v_accvgpr_write_b32 a24, s24 +; GCN-NEXT: v_accvgpr_write_b32 a25, s25 +; GCN-NEXT: v_accvgpr_write_b32 a26, s26 +; GCN-NEXT: v_accvgpr_write_b32 a27, s27 +; GCN-NEXT: v_accvgpr_write_b32 a28, s28 +; GCN-NEXT: v_accvgpr_write_b32 a29, s29 +; GCN-NEXT: v_accvgpr_write_b32 a30, s30 +; GCN-NEXT: v_accvgpr_write_b32 a31, s31 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -121,38 +89,22 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, s0 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s1 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s2 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s3 -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_accvgpr_write_b32 a5, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s6 -; GCN-NEXT: v_accvgpr_write_b32 a6, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s7 -; GCN-NEXT: v_accvgpr_write_b32 a7, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s8 -; GCN-NEXT: v_accvgpr_write_b32 a8, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s9 -; GCN-NEXT: v_accvgpr_write_b32 a9, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s10 -; GCN-NEXT: v_accvgpr_write_b32 a10, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s11 -; GCN-NEXT: v_accvgpr_write_b32 a11, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s12 -; GCN-NEXT: v_accvgpr_write_b32 a12, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s13 -; GCN-NEXT: v_accvgpr_write_b32 a13, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s14 -; GCN-NEXT: v_accvgpr_write_b32 a14, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s15 -; GCN-NEXT: v_accvgpr_write_b32 a15, v4 +; GCN-NEXT: v_accvgpr_write_b32 a0, s0 +; GCN-NEXT: v_accvgpr_write_b32 a1, s1 +; GCN-NEXT: v_accvgpr_write_b32 a2, s2 +; GCN-NEXT: v_accvgpr_write_b32 a3, s3 +; GCN-NEXT: v_accvgpr_write_b32 a4, s4 +; GCN-NEXT: v_accvgpr_write_b32 a5, s5 +; GCN-NEXT: v_accvgpr_write_b32 a6, s6 +; GCN-NEXT: v_accvgpr_write_b32 a7, s7 +; GCN-NEXT: v_accvgpr_write_b32 a8, s8 +; GCN-NEXT: v_accvgpr_write_b32 a9, s9 +; GCN-NEXT: v_accvgpr_write_b32 a10, s10 +; GCN-NEXT: v_accvgpr_write_b32 a11, s11 +; GCN-NEXT: v_accvgpr_write_b32 a12, s12 +; GCN-NEXT: v_accvgpr_write_b32 a13, s13 +; GCN-NEXT: v_accvgpr_write_b32 a14, s14 +; GCN-NEXT: v_accvgpr_write_b32 a15, s15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -183,14 +135,10 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, s0 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s1 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s2 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s3 -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 +; GCN-NEXT: v_accvgpr_write_b32 a0, s0 +; GCN-NEXT: v_accvgpr_write_b32 a1, s1 +; GCN-NEXT: v_accvgpr_write_b32 a2, s2 +; GCN-NEXT: v_accvgpr_write_b32 a3, s3 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_4x4x4bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -217,38 +165,22 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, s0 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s1 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s2 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s3 -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_accvgpr_write_b32 a5, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s6 -; GCN-NEXT: v_accvgpr_write_b32 a6, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s7 -; GCN-NEXT: v_accvgpr_write_b32 a7, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s8 -; GCN-NEXT: v_accvgpr_write_b32 a8, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s9 -; GCN-NEXT: v_accvgpr_write_b32 a9, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s10 -; GCN-NEXT: v_accvgpr_write_b32 a10, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s11 -; GCN-NEXT: v_accvgpr_write_b32 a11, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s12 -; GCN-NEXT: v_accvgpr_write_b32 a12, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s13 -; GCN-NEXT: v_accvgpr_write_b32 a13, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s14 -; GCN-NEXT: v_accvgpr_write_b32 a14, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s15 -; GCN-NEXT: v_accvgpr_write_b32 a15, v4 +; GCN-NEXT: v_accvgpr_write_b32 a0, s0 +; GCN-NEXT: v_accvgpr_write_b32 a1, s1 +; GCN-NEXT: v_accvgpr_write_b32 a2, s2 +; GCN-NEXT: v_accvgpr_write_b32 a3, s3 +; GCN-NEXT: v_accvgpr_write_b32 a4, s4 +; GCN-NEXT: v_accvgpr_write_b32 a5, s5 +; GCN-NEXT: v_accvgpr_write_b32 a6, s6 +; GCN-NEXT: v_accvgpr_write_b32 a7, s7 +; GCN-NEXT: v_accvgpr_write_b32 a8, s8 +; GCN-NEXT: v_accvgpr_write_b32 a9, s9 +; GCN-NEXT: v_accvgpr_write_b32 a10, s10 +; GCN-NEXT: v_accvgpr_write_b32 a11, s11 +; GCN-NEXT: v_accvgpr_write_b32 a12, s12 +; GCN-NEXT: v_accvgpr_write_b32 a13, s13 +; GCN-NEXT: v_accvgpr_write_b32 a14, s14 +; GCN-NEXT: v_accvgpr_write_b32 a15, s15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -280,14 +212,10 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, s0 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s1 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s2 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s3 -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 +; GCN-NEXT: v_accvgpr_write_b32 a0, s0 +; GCN-NEXT: v_accvgpr_write_b32 a1, s1 +; GCN-NEXT: v_accvgpr_write_b32 a2, s2 +; GCN-NEXT: v_accvgpr_write_b32 a3, s3 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x16bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -337,22 +265,14 @@ ; GCN-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 ; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1] ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, s0 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s1 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s2 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s3 -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_accvgpr_write_b32 a5, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s6 -; GCN-NEXT: v_accvgpr_write_b32 a6, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s7 -; GCN-NEXT: v_accvgpr_write_b32 a7, v4 +; GCN-NEXT: v_accvgpr_write_b32 a0, s0 +; GCN-NEXT: v_accvgpr_write_b32 a1, s1 +; GCN-NEXT: v_accvgpr_write_b32 a2, s2 +; GCN-NEXT: v_accvgpr_write_b32 a3, s3 +; GCN-NEXT: v_accvgpr_write_b32 a4, s4 +; GCN-NEXT: v_accvgpr_write_b32 a5, s5 +; GCN-NEXT: v_accvgpr_write_b32 a6, s6 +; GCN-NEXT: v_accvgpr_write_b32 a7, s7 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3 ; GCN-NEXT: v_mov_b32_e32 v0, 0 @@ -399,28 +319,20 @@ ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24 ; GCN-NEXT: s_mov_b64 s[4:5], 0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GCN-NEXT: s_mov_b64 s[10:11], 1.0 ; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-NEXT: s_mov_b64 s[8:9], s[4:5] -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s6 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s7 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_pk_mov_b32 v[0:1], s[14:15], s[14:15] op_sel:[0,1] -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s8 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s9 -; GCN-NEXT: v_accvgpr_write_b32 a5, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s10 -; GCN-NEXT: v_accvgpr_write_b32 a6, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s11 -; GCN-NEXT: v_accvgpr_write_b32 a7, v4 +; GCN-NEXT: s_mov_b64 s[8:9], s[4:5] +; GCN-NEXT: v_accvgpr_write_b32 a0, s4 +; GCN-NEXT: v_accvgpr_write_b32 a1, s5 +; GCN-NEXT: v_accvgpr_write_b32 a2, s6 +; GCN-NEXT: v_accvgpr_write_b32 a3, s7 +; GCN-NEXT: v_accvgpr_write_b32 a4, s8 +; GCN-NEXT: v_accvgpr_write_b32 a5, s9 +; GCN-NEXT: v_accvgpr_write_b32 a6, s10 +; GCN-NEXT: v_accvgpr_write_b32 a7, s11 ; GCN-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] @@ -442,29 +354,21 @@ ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24 ; GCN-NEXT: s_mov_b32 s4, 0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GCN-NEXT: s_mov_b32 s5, 0x405ec000 ; GCN-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-NEXT: s_mov_b64 s[8:9], s[4:5] -; GCN-NEXT: s_mov_b64 s[10:11], s[4:5] -; GCN-NEXT: v_mov_b32_e32 v4, s4 -; GCN-NEXT: v_accvgpr_write_b32 a0, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_accvgpr_write_b32 a1, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s6 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GCN-NEXT: v_accvgpr_write_b32 a2, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s7 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_pk_mov_b32 v[0:1], s[14:15], s[14:15] op_sel:[0,1] -; GCN-NEXT: v_accvgpr_write_b32 a3, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s8 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s9 -; GCN-NEXT: v_accvgpr_write_b32 a5, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s10 -; GCN-NEXT: v_accvgpr_write_b32 a6, v4 -; GCN-NEXT: v_mov_b32_e32 v4, s11 -; GCN-NEXT: v_accvgpr_write_b32 a7, v4 +; GCN-NEXT: s_mov_b64 s[8:9], s[4:5] +; GCN-NEXT: s_mov_b64 s[10:11], s[4:5] +; GCN-NEXT: v_accvgpr_write_b32 a0, s4 +; GCN-NEXT: v_accvgpr_write_b32 a1, s5 +; GCN-NEXT: v_accvgpr_write_b32 a2, s6 +; GCN-NEXT: v_accvgpr_write_b32 a3, s7 +; GCN-NEXT: v_accvgpr_write_b32 a4, s8 +; GCN-NEXT: v_accvgpr_write_b32 a5, s9 +; GCN-NEXT: v_accvgpr_write_b32 a6, s10 +; GCN-NEXT: v_accvgpr_write_b32 a7, s11 ; GCN-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir --- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -531,8 +531,7 @@ ; GFX90A-LABEL: name: s_to_a ; GFX90A: liveins: $sgpr0 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec - ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0 ; GFX940-LABEL: name: s_to_a ; GFX940: liveins: $sgpr0 @@ -560,10 +559,8 @@ ; GFX90A-LABEL: name: s2_to_a2 ; GFX90A: liveins: $sgpr0_sgpr1 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1 - ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1 - ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $sgpr0_sgpr1 + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 ; GFX940-LABEL: name: s2_to_a2 ; GFX940: liveins: $sgpr0_sgpr1 @@ -594,12 +591,9 @@ ; GFX90A-LABEL: name: s3_to_a3 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 - ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 - ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2 - ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $sgpr0_sgpr1_sgpr2 + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ; GFX940-LABEL: name: s3_to_a3 ; GFX940: liveins: $sgpr0_sgpr1_sgpr2 @@ -633,14 +627,10 @@ ; GFX90A-LABEL: name: s4_to_a4 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX940-LABEL: name: s4_to_a4 ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 @@ -679,18 +669,12 @@ ; GFX90A-LABEL: name: s6_to_a6 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX940-LABEL: name: s6_to_a6 ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 @@ -735,22 +719,14 @@ ; GFX90A-LABEL: name: s8_to_a8 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX940-LABEL: name: s8_to_a8 ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -813,38 +789,22 @@ ; GFX90A-LABEL: name: s16_to_a16 ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX940-LABEL: name: s16_to_a16 ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -1261,14 +1221,10 @@ ; GFX90A: liveins: $agpr0, $sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple ; GFX940: liveins: $agpr0, $sgpr2_sgpr3 @@ -1307,14 +1263,10 @@ ; GFX90A: liveins: $agpr0, $sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7 - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec - ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple_kill ; GFX940: liveins: $agpr0, $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-sgpr-no-vgprs.mir @@ -1,8 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s -# RUN: not --crash llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s - -# CHECK: LLVM ERROR: Error while trying to spill VGPR0 from class VGPR_32: Cannot scavenge register without an emergency spill slot! +# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX90A %s --- name: no_free_vgprs_for_copy_s32_to_a32 @@ -17,6 +15,11 @@ ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr8, implicit $exec ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1 + ; GFX90A-LABEL: name: no_free_vgprs_for_copy_s32_to_a32 + ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec + ; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1 $agpr1 = COPY $sgpr8 S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1 ... @@ -36,6 +39,12 @@ ; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9 ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3 + ; GFX90A-LABEL: name: no_free_vgprs_for_copy_s64_to_a64 + ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8_sgpr9 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit-def $agpr2_agpr3, implicit $sgpr8_sgpr9 + ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9 + ; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3 $agpr2_agpr3 = COPY $sgpr8_sgpr9 S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3 ... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x float>, i32, i32, i32) @@ -14,38 +14,39 @@ ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-DAG: s_load_dwordx16 ; GCN-DAG: s_load_dwordx16 -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_f32_32x32x2bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-32: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 @@ -65,7 +66,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx16 -; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_f32_16x16x2bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 @@ -85,7 +87,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx4 -; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_f32_4x4x2bf16 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 @@ -105,7 +108,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx16 -; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_f32_32x32x4bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 @@ -125,7 +129,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx4 -; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_f32_16x16x8bf16 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll @@ -13,43 +13,9 @@ ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4bf16_1k: ; GCN-DAG: s_load_dwordx16 ; GCN-DAG: s_load_dwordx16 -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX90A-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX90A-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 @@ -65,15 +31,14 @@ } ; GCN-LABEL: {{^}}test_mfma_f32_16x16x4bf16_1k: -; GCN-DAG: s_load_dwordx16 -; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GCN-NOT: v_accvgpr_read_b32 -; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] +; GCN-DAG: s_load_dwordx16 +; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GCN-NOT: v_accvgpr_read_b32 +; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 { bb: %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg @@ -85,15 +50,14 @@ } ; GCN-LABEL: {{^}}test_mfma_f32_4x4x4bf16_1k: -; GCN-DAG: s_load_dwordx4 -; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GCN-NOT: v_accvgpr_read_b32 -; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], +; GCN-DAG: s_load_dwordx4 +; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GCN-NOT: v_accvgpr_read_b32 +; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 { bb: %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg @@ -105,15 +69,14 @@ } ; GCN-LABEL: {{^}}test_mfma_f32_32x32x8bf16_1k: -; GCN-DAG: s_load_dwordx16 -; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GCN-NOT: v_accvgpr_read_b32 -; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] +; GCN-DAG: s_load_dwordx16 +; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GCN-NOT: v_accvgpr_read_b32 +; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 { bb: %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg @@ -125,15 +88,14 @@ } ; GCN-LABEL: {{^}}test_mfma_f32_16x16x16bf16_1k: -; GCN-DAG: s_load_dwordx4 -; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GCN-NOT: v_accvgpr_read_b32 -; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], +; GCN-DAG: s_load_dwordx4 +; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GCN-NOT: v_accvgpr_read_b32 +; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 { bb: %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg @@ -200,13 +162,12 @@ } ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_lit: -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} -; GFX90A-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000 -; GFX940-DAG: s_mov_b32 s{{[0-9]+}}, 0x405ec000 -; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} -; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} -; GCN: global_store_dwordx4 -; GCN: global_store_dwordx4 +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0x405ec000 +; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} +; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} +; GCN: global_store_dwordx4 +; GCN: global_store_dwordx4 define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32, i32, <16 x i32>, i32, i32, i32) @@ -9,7 +9,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx16 -; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_i32_32x32x8i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 @@ -27,7 +28,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx4 -; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_mfma_i32_16x16x16i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -23,39 +23,39 @@ ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN-DAG: s_load_dwordx16 ; GCN-DAG: s_load_dwordx16 -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 @@ -80,8 +80,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN: s_load_dwordx16 -; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT: v_accvgpr_read_b32 @@ -100,8 +100,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN: s_load_dwordx4 -; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 @@ -120,8 +120,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN: s_load_dwordx16 -; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x2f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_32x32x2_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 @@ -140,8 +140,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN: s_load_dwordx4 -; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x4f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_16x16x4_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 @@ -159,8 +159,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4f16: ; GCN-DAG: s_load_dwordx16 ; GCN-DAG: s_load_dwordx16 -; GFX908_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_32x32x4_2b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-32: v_accvgpr_read_b32 @@ -180,8 +180,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_16x16x4f16: ; GCN: s_load_dwordx16 -; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_16x16x4_4b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 @@ -202,8 +202,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_4x4x4f16: ; GCN: s_load_dwordx4 ; GCN: s_load_dwordx4 -; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_4x4x4f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_4x4x4_16b_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 @@ -225,8 +225,8 @@ ; GCN: s_load_dwordx16 ; GCN: s_waitcnt lgkmcnt(0) ; GFX908_A: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} -; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 @@ -247,8 +247,8 @@ ; GCN-LABEL: {{^}}test_mfma_f32_16x16x16f16: ; GCN: s_load_dwordx4 ; GCN: s_load_dwordx4 -; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x16f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_f32_16x16x16_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 @@ -271,39 +271,39 @@ ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-DAG: s_load_dwordx16 ; GCN-DAG: s_load_dwordx16 -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_i32_32x32x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_i32_32x32x4_2b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-32: v_accvgpr_read_b32 @@ -322,8 +322,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx16 -; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_i32_16x16x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_i32_16x16x4_4b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 @@ -342,8 +342,8 @@ ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx4 -; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_i32_4x4x4i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX940: v_mfma_i32_4x4x4_16b_i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 @@ -594,8 +594,8 @@ } ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_lit_splat: -; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000 -; GFX940: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000 +; GFX908: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000 +; GFX90A_40: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000 ; GCN: v_accvgpr_write_b32 [[TTMPA:a[0-9]+]], [[TMP]] ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] @@ -620,8 +620,8 @@ } ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_lit_splat_bad_code: -; GFX908_A: v_mov_b32_e32 [[TMP0:v[0-9]+]], 0x42f60000 -; GFX940: s_mov_b32 [[TMP0:s[0-9]+]], 0x42f60000 +; GFX908: v_mov_b32_e32 [[TMP0:v[0-9]+]], 0x42f60000 +; GFX90A_40:s_mov_b32 [[TMP0:s[0-9]+]], 0x42f60000 ; GCN: v_accvgpr_write_b32 [[AGPR:a[0-9]+]], [[TMP0]] ; GFX90A_40-COUNT-3: v_accvgpr_mov_b32 a{{[0-9]+}}, [[AGPR]] ; GFX908-NEXT: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP0]] diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll --- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908,GFX908_A %s -; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A %s -; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940 %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A,GFX940_A %s +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940,GFX940_A %s ; GCN-LABEL: {{^}}test_mfma_loop_zeroinit: @@ -47,8 +47,8 @@ ; 3 vgprs are needed to avoid wait states between writes. ; Check that we do not use 32 temp sgprs as well. -; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000 -; GFX940: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000 +; GFX908: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000 +; GFX940_A: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000 ; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] ; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]] ; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]] @@ -188,73 +188,8 @@ ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]] ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}} -; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] - -; GFX940-COUNT-32: s_mov_b32 s{{[0-9]+}}, 0x4{{[0-9a-f]+}} -; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX940_A-COUNT-32: s_mov_b32 s{{[0-9]+}}, 0x4{{[0-9a-f]+}} +; GFX940_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr @@ -355,10 +290,9 @@ ; GCN-LABEL: {{^}}test_mfma_loop_sgpr_init: -; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} +; GFX908: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} ; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]] -; GFX940: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}} +; GFX940_A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}} ; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]] ; GCN: [[LOOP:.LBB[0-9_]+]]: @@ -426,8 +360,8 @@ ; GCN-LABEL: {{^}}test_mfma_loop_mixed_init: ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v0 -; GFX908_A-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} -; GFX940-DAG: s_load_dword [[TMP:s[0-9]+]], +; GFX908-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} +; GFX940_A-DAG: s_load_dword [[TMP:s[0-9]+]], ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} diff --git a/llvm/test/MC/AMDGPU/mai-gfx90a.s b/llvm/test/MC/AMDGPU/mai-gfx90a.s --- a/llvm/test/MC/AMDGPU/mai-gfx90a.s +++ b/llvm/test/MC/AMDGPU/mai-gfx90a.s @@ -27,6 +27,9 @@ v_accvgpr_mov_b32 a1, a2 // GFX90A: v_accvgpr_mov_b32 a1, a2 ; encoding: [0x02,0xa5,0x02,0x7e] +v_accvgpr_write_b32 a10, s20 +// GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18] + v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] // GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0x04] diff --git a/llvm/test/MC/AMDGPU/mai-gfx940.s b/llvm/test/MC/AMDGPU/mai-gfx940.s --- a/llvm/test/MC/AMDGPU/mai-gfx940.s +++ b/llvm/test/MC/AMDGPU/mai-gfx940.s @@ -3,7 +3,6 @@ v_accvgpr_write_b32 a10, s20 // GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18] -// GFX90A: error: source operand must be either a VGPR or an inline constant v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] // GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x14] diff --git a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt --- a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx90a.txt @@ -21,6 +21,9 @@ # GFX90A: v_accvgpr_mov_b32 a1, a2 ; encoding: [0x02,0xa5,0x02,0x7e] 0x02,0xa5,0x02,0x7e +# GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18] +0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18 + # GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x04] 0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x04