Index: lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -37,7 +37,7 @@ if (Val) return false; - MIRBuilder.buildInstr(AMDGPU::S_ENDPGM); + MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); return true; } Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -174,6 +174,7 @@ ImmTyNegHi, ImmTySwizzle, ImmTyGprIdxMode, + ImmTyEndpgm, ImmTyHigh }; @@ -517,6 +518,7 @@ bool isGPRIdxMode() const; bool isS16Imm() const; bool isU16Imm() const; + bool isEndpgm() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -706,6 +708,9 @@ case ImmTySwizzle: OS << "Swizzle"; break; case ImmTyGprIdxMode: OS << "GprIdxMode"; break; case ImmTyHigh: OS << "High"; break; + case ImmTyEndpgm: + OS << "Endpgm"; + break; } } @@ -1187,6 +1192,9 @@ void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, uint64_t BasicInstType, bool skipVcc = false); + + OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); + AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; }; struct OptionalOperand { @@ -5522,6 +5530,10 @@ return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); +} + AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); } @@ -5798,3 +5810,28 @@ return Match_InvalidOperand; } } + +//===----------------------------------------------------------------------===// +// endpgm +//===----------------------------------------------------------------------===// + +OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Imm = 0; + + if (!parseExpr(Imm)) { + // The operand is optional, if not present default to 0 + Imm = 0; + } + + if (!isUInt<16>(Imm)) { + Error(S, "expected a 16-bit value"); + return MatchOperand_ParseFail; + } + + Operands.push_back( + AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); + return MatchOperand_Success; +} + +bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -213,6 +213,8 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printEndpgm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); }; class R600InstPrinter : public MCInstPrinter { Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -1209,6 +1209,17 @@ O << ')'; } +void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint16_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm == 0) { + return; + } + + O << formatDec(Imm); +} + #include "AMDGPUGenAsmWriter.inc" void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O, Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -176,7 +176,7 @@ .addImm(0); // en // ... and terminate wavefront. - BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); + BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0); return true; } Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1137,9 +1137,13 @@ if (MBB.succ_empty()) { bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); - if (HasNoTerminator) - BuildMI(MBB, MBB.end(), DebugLoc(), - get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG)); + if (HasNoTerminator) { + if (Info->returnsVoid()) { + BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0); + } else { + BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG)); + } + } } } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -668,6 +668,14 @@ let IsOptional = 1; } +def EndpgmMatchClass : AsmOperandClass { + let Name = "EndpgmImm"; + let PredicateMethod = "isEndpgm"; + let ParserMethod = "parseEndpgmOp"; + let RenderMethod = "addImmOperands"; + let IsOptional = 1; +} + def ExpTgtMatchClass : AsmOperandClass { let Name = "ExpTgt"; let PredicateMethod = "isExpTgt"; @@ -685,6 +693,11 @@ let ParserMatchClass = SwizzleMatchClass; } +def EndpgmImm : Operand { + let PrintMethod = "printEndpgm"; + let ParserMatchClass = EndpgmMatchClass; +} + def SWaitMatchClass : AsmOperandClass { let Name = "SWaitCnt"; let RenderMethod = "addImmOperands"; Index: lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp =================================================================== --- lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -247,9 +247,10 @@ // Skip this if the endpgm has any implicit uses, otherwise we would need // to be careful to update / remove them. + // S_ENDPGM always has a single imm operand that is not used other than to + // end up in the encoding MachineInstr &Term = MBB.back(); - if (Term.getOpcode() != AMDGPU::S_ENDPGM || - Term.getNumOperands() != 0) + if (Term.getOpcode() != AMDGPU::S_ENDPGM || Term.getNumOperands() != 1) continue; SmallVector Blocks({&MBB}); Index: lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- lib/Target/AMDGPU/SOPInstructions.td +++ lib/Target/AMDGPU/SOPInstructions.td @@ -866,9 +866,7 @@ let isTerminator = 1 in { -def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm", - [(AMDGPUendpgm)]> { - let simm16 = 0; +def S_ENDPGM : SOPP <0x00000001, (ins EndpgmImm:$simm16), "s_endpgm $simm16"> { let isBarrier = 1; let isReturn = 1; } @@ -1044,6 +1042,11 @@ // SOP1 Patterns //===----------------------------------------------------------------------===// +def : GCNPat < + (AMDGPUendpgm), + (S_ENDPGM (i16 0)) +>; + def : GCNPat < (i64 (ctpop i64:$src)), (i64 (REG_SEQUENCE SReg_64, Index: test/CodeGen/AMDGPU/GlobalISel/legalize-block-addr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-block-addr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-block-addr.mir @@ -21,8 +21,8 @@ bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_blockaddress ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - ; CHECK: S_ENDPGM implicit [[BLOCK_ADDR]](p0) + ; CHECK: S_ENDPGM 0, implicit [[BLOCK_ADDR]](p0) %0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - S_ENDPGM implicit %0 + S_ENDPGM 0, implicit %0 ... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir @@ -32,9 +32,9 @@ ; CHECK-LABEL: name: test_constant_s1 ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; CHECK: S_ENDPGM implicit [[C]](s1) + ; CHECK: S_ENDPGM 0, implicit [[C]](s1) %1:_(s1) = G_CONSTANT i1 0 - S_ENDPGM implicit %1 + S_ENDPGM 0, implicit %1 ... --- Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-block-addr.mir @@ -22,8 +22,8 @@ bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_blockaddress ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:sgpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - ; CHECK: S_ENDPGM implicit [[BLOCK_ADDR]](p0) + ; CHECK: S_ENDPGM 0, implicit [[BLOCK_ADDR]](p0) %0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - S_ENDPGM implicit %0 + S_ENDPGM 0, implicit %0 ... Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-merge-values.mir @@ -14,12 +14,12 @@ ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[COPY]](s64), 0 ; CHECK: [[EXTRACT1:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[COPY]](s64), 32 ; CHECK: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32) - ; CHECK: S_ENDPGM implicit [[MV]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $sgpr0_sgpr1 %1:_(s32) = G_EXTRACT %0, 0 %2:_(s32) = G_EXTRACT %0, 32 %3:_(s64) = G_MERGE_VALUES %1, %2 - S_ENDPGM implicit %3 + S_ENDPGM 0, implicit %3 ... --- @@ -34,11 +34,11 @@ ; CHECK: [[EXTRACT:%[0-9]+]]:vgpr(s32) = G_EXTRACT [[COPY]](s64), 0 ; CHECK: [[EXTRACT1:%[0-9]+]]:vgpr(s32) = G_EXTRACT [[COPY]](s64), 32 ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32) - ; CHECK: S_ENDPGM implicit [[MV]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 0 %2:_(s32) = G_EXTRACT %0, 32 %3:_(s64) = G_MERGE_VALUES %1, %2 - S_ENDPGM implicit %3 + S_ENDPGM 0, implicit %3 ... Index: test/CodeGen/AMDGPU/break-smem-soft-clauses.mir =================================================================== --- test/CodeGen/AMDGPU/break-smem-soft-clauses.mir +++ test/CodeGen/AMDGPU/break-smem-soft-clauses.mir @@ -9,9 +9,9 @@ bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # Trivial clause at beginning of program @@ -22,10 +22,10 @@ ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # Trivial clause at beginning of program @@ -37,11 +37,11 @@ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # Trivial clause at beginning of program @@ -54,12 +54,12 @@ ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # Reuse of same input pointer is OK @@ -69,10 +69,10 @@ ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # 32-bit load partially clobbers its own ptr reg @@ -82,9 +82,9 @@ bb.0: ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # 32-bit load partially clobbers its own ptr reg @@ -94,9 +94,9 @@ bb.0: ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # 64-bit load clobbers its own ptr reg @@ -106,9 +106,9 @@ bb.0: ; GCN-LABEL: name: smrd_load8_overwrite_ptr ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt @@ -137,7 +137,7 @@ ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 @@ -160,7 +160,7 @@ $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -172,10 +172,10 @@ ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -186,10 +186,10 @@ ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -201,10 +201,10 @@ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -215,10 +215,10 @@ ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -232,13 +232,13 @@ ; GCN: bb.1: ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 bb.0: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 bb.1: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # The load clobbers the pointer of the store, so it needs to break. @@ -250,10 +250,10 @@ ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4 ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # The load clobbers the data of the store, so it needs to break. @@ -266,10 +266,10 @@ ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4 ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # Regular VALU instruction breaks clause, no nop needed @@ -281,11 +281,11 @@ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr8 = V_MOV_B32_e32 0, implicit $exec $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # Regular SALU instruction breaks clause, no nop needed @@ -297,11 +297,11 @@ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $sgpr8 = S_MOV_B32 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- name: ds_inst_breaks_smem_clause @@ -312,11 +312,11 @@ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -328,11 +328,11 @@ ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # FIXME: Should this be handled? @@ -344,8 +344,8 @@ ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir =================================================================== --- test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir +++ test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir @@ -8,10 +8,10 @@ bb.0: ; GCN-LABEL: name: trivial_clause_load_flat4_x1 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # Trivial clause at beginning of program @@ -22,11 +22,11 @@ ; GCN-LABEL: name: trivial_clause_load_flat4_x2 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # Trivial clause at beginning of program @@ -38,12 +38,12 @@ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # Trivial clause at beginning of program @@ -56,13 +56,13 @@ ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # Reuse of same input pointer is OK @@ -73,11 +73,11 @@ ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # 32-bit load partially clobbers its own ptr reg @@ -87,10 +87,10 @@ bb.0: ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # 32-bit load partially clobbers its own ptr reg @@ -100,10 +100,10 @@ bb.0: ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # 64-bit load clobbers its own ptr reg @@ -113,10 +113,10 @@ bb.0: ; GCN-LABEL: name: flat_load8_overwrite_ptr ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt @@ -147,7 +147,7 @@ ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr @@ -171,7 +171,7 @@ $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -183,11 +183,11 @@ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -199,11 +199,11 @@ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -215,11 +215,11 @@ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -232,10 +232,10 @@ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -253,14 +253,14 @@ ; GCN: bb.1: ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 bb.0: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr bb.1: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # The load clobbers the pointer of the store, so it needs to break. @@ -272,11 +272,11 @@ ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # The load clobbers the data of the store, so it needs to break. @@ -289,11 +289,11 @@ ; GCN-LABEL: name: break_clause_store_load_into_data_flat4 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # Regular VALU instruction breaks clause, no nop needed @@ -307,12 +307,12 @@ ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr8 = V_MOV_B32_e32 0, implicit $exec $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # Regular SALU instruction breaks clause, no nop needed @@ -326,12 +326,12 @@ ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $sgpr8 = S_MOV_B32 0 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -344,12 +344,12 @@ ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -361,12 +361,12 @@ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # FIXME: Should this be handled? @@ -378,11 +378,11 @@ ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- name: trivial_clause_load_mubuf4_x2 @@ -392,11 +392,11 @@ ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- name: break_clause_simple_load_mubuf_offen_ptr @@ -407,11 +407,11 @@ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # BUFFER instructions overwriting their own inputs is supposedly OK. @@ -424,11 +424,11 @@ ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr1 = V_MOV_B32_e32 0, implicit $exec $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # Break a clause from interference between mubuf and flat instructions @@ -441,11 +441,11 @@ ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # Break a clause from interference between mubuf and flat instructions @@ -454,7 +454,7 @@ # GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 # XNACK-NEXT: S_NOP 0 # GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: break_clause_mubuf_load_flat_load body: | @@ -462,7 +462,7 @@ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -474,11 +474,11 @@ ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- name: break_clause_atomic_nortn_ptr_load_flat4 @@ -488,11 +488,11 @@ ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4 ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -504,11 +504,11 @@ ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -519,11 +519,11 @@ ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4 ; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # Make sure there is no assert on mubuf instructions which do not have @@ -535,10 +535,10 @@ ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_ENDPGM + ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # Loads and stores using different addresses theoretically does not @@ -557,7 +557,7 @@ $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # Loads and stores using the same address needs a nop. @@ -576,5 +576,5 @@ $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/clamp-omod-special-case.mir =================================================================== --- test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -59,7 +59,7 @@ %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -121,7 +121,7 @@ %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # Don't fold a mul that looks like an omod if itself has omod set @@ -184,7 +184,7 @@ %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -249,7 +249,7 @@ %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... @@ -326,7 +326,7 @@ %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -391,7 +391,7 @@ %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- Index: test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir =================================================================== --- test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir +++ test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir @@ -26,6 +26,6 @@ $vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir =================================================================== --- test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir +++ test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir @@ -31,7 +31,7 @@ %15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec %16:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %15, implicit $exec BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4) - S_ENDPGM + S_ENDPGM 0 bb.2: successors: %bb.3, %bb.4 Index: test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir =================================================================== --- test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir +++ test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir @@ -129,5 +129,5 @@ %29:vgpr_32 = V_ADD_F32_e32 0, killed %28, implicit $exec $m0 = S_MOV_B32 -1 DS_WRITE_B32 undef %30:vgpr_32, killed %29, 0, 0, implicit $m0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`, addrspace 3) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir =================================================================== --- test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir +++ test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir @@ -264,5 +264,5 @@ %115:vgpr_32 = V_MAX_F32_e32 0, killed %114, implicit $exec %116:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, killed %115, 0, 1065353216, 0, 0, implicit $exec EXP 0, undef %117:vgpr_32, killed %116, undef %118:vgpr_32, undef %119:vgpr_32, -1, -1, 15, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir =================================================================== --- test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir +++ test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir @@ -156,7 +156,7 @@ %54:vreg_128 = COPY killed %38 %55:vgpr_32 = V_FMA_F32 0, killed %54.sub1, 0, target-flags(amdgpu-gotprel32-lo) 1056964608, 0, 1056964608, 0, 0, implicit $exec EXP 1, undef %56:vgpr_32, killed %55, undef %57:vgpr_32, undef %58:vgpr_32, -1, 0, 15, implicit $exec - S_ENDPGM + S_ENDPGM 0 bb.18: successors: %bb.7(0x80000000) Index: test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir =================================================================== --- test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir +++ test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir @@ -180,5 +180,5 @@ successors: %bb.21(0x80000000) bb.21: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir =================================================================== --- test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir +++ test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir @@ -93,6 +93,6 @@ undef %32.sub0:vreg_128 = COPY killed %31.sub0 %32.sub2:vreg_128 = COPY %33 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %32:vreg_128 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir =================================================================== --- test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -55,7 +55,7 @@ %9 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc %10 = COPY %9 BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -158,7 +158,7 @@ %31 = V_AND_B32_e64 %34, %34, implicit $exec FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -220,7 +220,7 @@ %12 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc %13 = COPY %12 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -361,7 +361,7 @@ %28 = V_LSHL_B32_e32 %27, %6, implicit $exec FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -420,7 +420,7 @@ %12 = S_ASHR_I32 killed %5, 12, implicit-def dead $scc %13 = COPY %12 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... @@ -569,7 +569,7 @@ %28 = V_ASHR_I32_e32 %27, %35, implicit $exec FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -628,7 +628,7 @@ %12 = S_LSHR_B32 killed %5, 12, implicit-def dead $scc %13 = COPY %12 BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -778,7 +778,7 @@ %28 = V_LSHR_B32_e32 %27, %35, implicit $exec FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -788,7 +788,7 @@ # GCN-LABEL: name: undefined_vreg_operand{{$}} # GCN: bb.0 # GCN-NEXT: FLAT_STORE_DWORD undef %3:vreg_64, undef %1:vgpr_32, -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: undefined_vreg_operand tracksRegLiveness: true registers: @@ -801,7 +801,7 @@ %0 = V_MOV_B32_e32 0, implicit $exec %2 = V_XOR_B32_e64 killed %0, undef %1, implicit $exec FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- @@ -828,13 +828,13 @@ bb.3: liveins: $vcc SI_END_CF %0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - S_ENDPGM implicit $vcc + S_ENDPGM 0, implicit $vcc ... --- # GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}} # GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec -# GCN-NEXT: S_ENDPGM implicit %2 +# GCN-NEXT: S_ENDPGM 0, implicit %2 name: constant_fold_lshl_or_reg0_immreg_reg alignment: 0 @@ -851,7 +851,7 @@ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -859,7 +859,7 @@ # GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}} # GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec -# GCN-NEXT: S_ENDPGM implicit %2 +# GCN-NEXT: S_ENDPGM 0, implicit %2 name: constant_fold_lshl_or_reg0_immreg_imm alignment: 0 @@ -875,7 +875,7 @@ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -883,7 +883,7 @@ # GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}} # GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec -# GCN-NEXT: S_ENDPGM implicit %3 +# GCN-NEXT: S_ENDPGM 0, implicit %3 name: constant_fold_lshl_or_reg0_immreg_immreg alignment: 0 @@ -900,6 +900,6 @@ %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec - S_ENDPGM implicit %3 + S_ENDPGM 0, implicit %3 ... Index: test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir =================================================================== --- test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir +++ test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir @@ -311,9 +311,9 @@ %44:vgpr_32 = V_MAD_F32 0, killed %43, 0, 0, 0, 0, 0, 0, implicit $exec %45:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, killed %44, 0, undef %46:vgpr_32, 0, 0, implicit $exec EXP_DONE 0, killed %45, undef %47:vgpr_32, undef %48:vgpr_32, undef %49:vgpr_32, -1, -1, 15, implicit $exec - S_ENDPGM + S_ENDPGM 0 bb.6.DummyReturnBlock: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/endpgm-dce.mir =================================================================== --- test/CodeGen/AMDGPU/endpgm-dce.mir +++ test/CodeGen/AMDGPU/endpgm-dce.mir @@ -2,7 +2,7 @@ # GCN-LABEL: name: kill_all # GCN: bb.0: -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: kill_all tracksRegLiveness: true registers: @@ -20,13 +20,13 @@ %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: load_without_memoperand # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc # GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: load_without_memoperand tracksRegLiveness: true registers: @@ -44,13 +44,13 @@ %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: load_volatile # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc # GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: load_volatile tracksRegLiveness: true registers: @@ -68,13 +68,13 @@ %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: store # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc # GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: store tracksRegLiveness: true registers: @@ -87,13 +87,13 @@ %1 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: barrier # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc # GCN-NEXT: S_BARRIER -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: barrier tracksRegLiveness: true body: | @@ -101,13 +101,13 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc S_BARRIER - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: call # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc # GCN-NEXT: $sgpr4_sgpr5 = S_SWAPPC_B64 $sgpr2_sgpr3 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: call tracksRegLiveness: true body: | @@ -115,13 +115,13 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc $sgpr4_sgpr5 = S_SWAPPC_B64 $sgpr2_sgpr3 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: exp # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc # GCN-NEXT: EXP 32, undef %0:vgpr_32, undef %1:vgpr_32, %2, undef %3:vgpr_32, 0, 0, 15, implicit $exec -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: exp tracksRegLiveness: true registers: @@ -135,7 +135,7 @@ %2 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc EXP 32, undef %0, undef %1, killed %2, undef %3, 0, 0, 15, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: return_to_epilog @@ -156,7 +156,7 @@ # GCN-NEXT: successors: %bb.1 # GCN-NOT: S_OR_B64 # GCN: bb.1: -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: split_block tracksRegLiveness: true registers: @@ -174,7 +174,7 @@ %2 = IMPLICIT_DEF %1 = V_ADD_F32_e64 0, killed %0, 0, 1, 0, 0, implicit $exec %3 = S_ADD_U32 %2, 1, implicit-def $scc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: split_block_empty_block @@ -183,7 +183,7 @@ # GCN-NOT: S_OR_B64 # GCN: bb.1: # GCN: bb.2: -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: split_block_empty_block tracksRegLiveness: true body: | @@ -194,7 +194,7 @@ bb.1: bb.2: - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: split_block_uncond_branch @@ -203,7 +203,7 @@ # GCN: S_BRANCH %bb.1 # GCN-NOT: S_OR_B64 # GCN: bb.1: -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: split_block_uncond_branch tracksRegLiveness: true body: | @@ -213,7 +213,7 @@ S_BRANCH %bb.1 bb.1: - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: split_block_cond_branch @@ -223,7 +223,7 @@ # GCN: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc # GCN: bb.1: # GCN: bb.2: -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: split_block_cond_branch tracksRegLiveness: true body: | @@ -235,7 +235,7 @@ bb.1: bb.2: - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: two_preds_both_dead @@ -248,7 +248,7 @@ # GCN-NOT: S_AND # GCN: S_BRANCH %bb.2 # GCN: bb.2: -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: two_preds_both_dead tracksRegLiveness: true body: | @@ -263,7 +263,7 @@ S_BRANCH %bb.2 bb.2: - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: two_preds_one_dead @@ -277,7 +277,7 @@ # GCN-NOT: S_AND # GCN: S_BRANCH %bb.2 # GCN: bb.2: -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: two_preds_one_dead tracksRegLiveness: true body: | @@ -293,14 +293,14 @@ S_BRANCH %bb.2 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: implicit_use_on_s_endpgm # GCN: V_ADD_I32 # GCN: COPY # GCN: V_ADDC_U32 -# GCN: S_ENDPGM implicit %3 +# GCN: S_ENDPGM 0, implicit %3 name: implicit_use_on_s_endpgm tracksRegLiveness: true @@ -309,6 +309,6 @@ dead %0:vgpr_32 = V_ADD_I32_e32 12345, undef %1:vgpr_32, implicit-def $vcc, implicit $exec %2:sreg_64_xexec = COPY $vcc %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 undef %5:vgpr_32, undef %6:vgpr_32, %2, implicit $exec - S_ENDPGM implicit %3 + S_ENDPGM 0, implicit %3 ... Index: test/CodeGen/AMDGPU/flat-load-clustering.mir =================================================================== --- test/CodeGen/AMDGPU/flat-load-clustering.mir +++ test/CodeGen/AMDGPU/flat-load-clustering.mir @@ -72,6 +72,6 @@ %10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec FLAT_STORE_DWORD %9, %5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2) FLAT_STORE_DWORD %10, %6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/fold-imm-f16-f32.mir =================================================================== --- test/CodeGen/AMDGPU/fold-imm-f16-f32.mir +++ test/CodeGen/AMDGPU/fold-imm-f16-f32.mir @@ -162,7 +162,7 @@ %12 = V_MOV_B32_e32 1065353216, implicit $exec %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -229,7 +229,7 @@ %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -297,7 +297,7 @@ %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -370,7 +370,7 @@ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -434,7 +434,7 @@ %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -504,7 +504,7 @@ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -571,7 +571,7 @@ %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -638,7 +638,7 @@ %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -704,6 +704,6 @@ %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir =================================================================== --- test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir +++ test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir @@ -15,14 +15,14 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc - ; GCN: S_ENDPGM implicit [[COPY]] + ; GCN: S_ENDPGM 0, implicit [[COPY]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = IMPLICIT_DEF %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %5 + S_ENDPGM 0, implicit %5 ... --- @@ -38,7 +38,7 @@ ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec ; GCN: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF1]], implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]], implicit [[V_ADD_I32_e64_2]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_1]], implicit [[V_ADD_I32_e64_2]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -47,7 +47,7 @@ %5:vgpr_32, %6:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec %7:vgpr_32, %8:sreg_64_xexec = V_ADD_I32_e64 %0, %2, implicit $exec - S_ENDPGM implicit %6, implicit %7 + S_ENDPGM 0, implicit %6, implicit %7 ... --- @@ -64,7 +64,7 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: DBG_VALUE %5:sreg_64_xexec, $noreg - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -72,7 +72,7 @@ %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec DBG_VALUE %5, $noreg - S_ENDPGM implicit %4 + S_ENDPGM 0, implicit %4 ... @@ -93,7 +93,7 @@ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = IMPLICIT_DEF @@ -101,6 +101,6 @@ %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, implicit $exec - S_ENDPGM implicit %6 + S_ENDPGM 0, implicit %6 ... Index: test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir =================================================================== --- test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -12,11 +12,11 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -31,11 +31,11 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... --- @@ -49,11 +49,11 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... --- @@ -71,11 +71,11 @@ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] %0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec %1:sreg_32_xm0 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -90,11 +90,11 @@ ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]] %0:sreg_32_xm0 = IMPLICIT_DEF %1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -110,12 +110,12 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc $vcc = S_MOV_B64 -1 %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2, implicit $vcc + S_ENDPGM 0, implicit %2, implicit $vcc ... @@ -134,7 +134,7 @@ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec ; GCN: bb.1: ; GCN: liveins: $vcc - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc bb.0: successors: %bb.1 $vcc = S_MOV_B64 -1 @@ -144,7 +144,7 @@ bb.1: liveins: $vcc - S_ENDPGM implicit %2, implicit $vcc + S_ENDPGM 0, implicit %2, implicit $vcc ... --- @@ -161,7 +161,7 @@ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec ; GCN: bb.1: ; GCN: liveins: $vcc_lo - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo bb.0: successors: %bb.1 $vcc = S_MOV_B64 -1 @@ -171,7 +171,7 @@ bb.1: liveins: $vcc_lo - S_ENDPGM implicit %2, implicit $vcc_lo + S_ENDPGM 0, implicit %2, implicit $vcc_lo ... --- @@ -191,7 +191,7 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo bb.0: successors: %bb.1 $vcc = S_MOV_B64 -1 @@ -201,7 +201,7 @@ %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2, implicit $vcc_lo + S_ENDPGM 0, implicit %2, implicit $vcc_lo ... --- @@ -222,7 +222,7 @@ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec ; GCN: bb.2: ; GCN: liveins: $vcc_hi - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_hi + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_hi bb.0: successors: %bb.1 $vcc_hi = S_MOV_B32 -1 @@ -236,7 +236,7 @@ bb.2: liveins: $vcc_hi - S_ENDPGM implicit %2, implicit $vcc_hi + S_ENDPGM 0, implicit %2, implicit $vcc_hi ... @@ -251,11 +251,11 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -270,11 +270,11 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -289,11 +289,11 @@ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -308,11 +308,11 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -331,7 +331,7 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: bb.1: - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] bb.0: successors: %bb.1 @@ -372,7 +372,7 @@ S_NOP 0 bb.1: - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... @@ -392,7 +392,7 @@ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec ; GCN: bb.1: - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] bb.0: successors: %bb.1 @@ -404,7 +404,7 @@ S_NOP 0 bb.1: - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... --- @@ -449,7 +449,7 @@ ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF DBG_VALUE $noreg, 0 @@ -481,7 +481,7 @@ DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... --- @@ -526,7 +526,7 @@ ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 ; GCN: DBG_VALUE $noreg, 0 - ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF S_NOP 0 @@ -587,6 +587,6 @@ DBG_VALUE $noreg, 0 DBG_VALUE $noreg, 0 $vcc = S_MOV_B64 0 - S_ENDPGM implicit %2 + S_ENDPGM 0, implicit %2 ... Index: test/CodeGen/AMDGPU/fold-immediate-output-mods.mir =================================================================== --- test/CodeGen/AMDGPU/fold-immediate-output-mods.mir +++ test/CodeGen/AMDGPU/fold-immediate-output-mods.mir @@ -67,7 +67,7 @@ %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -138,7 +138,7 @@ %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -209,7 +209,7 @@ %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -280,6 +280,6 @@ %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/fold-implicit-operand.mir =================================================================== --- test/CodeGen/AMDGPU/fold-implicit-operand.mir +++ test/CodeGen/AMDGPU/fold-implicit-operand.mir @@ -4,11 +4,11 @@ # implicit use # CHECK: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# CHECK-NEXT: S_ENDPGM implicit %0 +# CHECK-NEXT: S_ENDPGM 0, implicit %0 name: fold_imm_implicit_operand body: | bb.0: %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - S_ENDPGM implicit %0 + S_ENDPGM 0, implicit %0 ... Index: test/CodeGen/AMDGPU/fold-multiple.mir =================================================================== --- test/CodeGen/AMDGPU/fold-multiple.mir +++ test/CodeGen/AMDGPU/fold-multiple.mir @@ -35,6 +35,6 @@ %4 = V_AND_B32_e64 killed %2, killed %3, implicit $exec %5 = IMPLICIT_DEF BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/global-load-store-atomics.mir =================================================================== --- test/CodeGen/AMDGPU/global-load-store-atomics.mir +++ test/CodeGen/AMDGPU/global-load-store-atomics.mir @@ -245,5 +245,5 @@ GLOBAL_STORE_DWORDX2 %11, %78, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_CMPSWAP_X2 %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir =================================================================== --- test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir +++ test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir @@ -14,6 +14,6 @@ BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, implicit $exec $vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/hazard-inlineasm.mir =================================================================== --- test/CodeGen/AMDGPU/hazard-inlineasm.mir +++ test/CodeGen/AMDGPU/hazard-inlineasm.mir @@ -18,7 +18,7 @@ bb.0: FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, implicit $exec, implicit $flat_scr INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def $vgpr26_vgpr27, 2818058, def dead $sgpr14_sgpr15, 589833, $sgpr12, 327689, killed $vgpr51, 2621449, $vgpr46_vgpr47 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/hazard-kill.mir =================================================================== --- test/CodeGen/AMDGPU/hazard-kill.mir +++ test/CodeGen/AMDGPU/hazard-kill.mir @@ -27,6 +27,6 @@ renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $m0, implicit $exec renamable $sgpr0 = S_MOV_B32 0 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/hazard.mir =================================================================== --- test/CodeGen/AMDGPU/hazard.mir +++ test/CodeGen/AMDGPU/hazard.mir @@ -81,7 +81,7 @@ $vgpr5 = IMPLICIT_DEF $vgpr6 = IMPLICIT_DEF S_SENDMSG 3, implicit $exec, implicit $m0 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: hazard-lookahead-dbg-value @@ -102,7 +102,7 @@ DBG_VALUE 5 DBG_VALUE 6 S_SENDMSG 3, implicit $exec, implicit $m0 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: hazard-lookahead-dbg-label @@ -123,5 +123,5 @@ DBG_LABEL 5 DBG_LABEL 6 S_SENDMSG 3, implicit $exec, implicit $m0 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/insert-skip-from-vcc.mir =================================================================== --- test/CodeGen/AMDGPU/insert-skip-from-vcc.mir +++ test/CodeGen/AMDGPU/insert-skip-from-vcc.mir @@ -17,7 +17,7 @@ $sgpr0_sgpr1 = S_MOV_B64 -1 $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_imm_vccz @@ -34,7 +34,7 @@ bb.2: $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execnz_imm_vccnz @@ -51,7 +51,7 @@ bb.2: $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_imm_vccz_live_scc @@ -68,7 +68,7 @@ bb.2: $vcc = S_AND_B64 $exec, -1, implicit-def $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_live_scc @@ -87,7 +87,7 @@ $sgpr0_sgpr1 = S_MOV_B64 -1 $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_live_sreg @@ -105,7 +105,7 @@ $sgpr0_sgpr1 = S_MOV_B64 -1 $vcc = S_AND_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_live_sreg_commute @@ -123,7 +123,7 @@ $sgpr0_sgpr1 = S_MOV_B64 -1 $vcc = S_AND_B64 $sgpr0_sgpr1, $exec, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_live_scc_commute @@ -142,7 +142,7 @@ $sgpr0_sgpr1 = S_MOV_B64 -1 $vcc = S_AND_B64 killed $sgpr0_sgpr1, $exec, implicit-def $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_commute @@ -161,12 +161,12 @@ $sgpr0_sgpr1 = S_MOV_B64 -1 $vcc = S_AND_B64 killed $sgpr0_sgpr1, $exec, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_exec_vccz # GCN: $exec = S_MOV_B64 -1 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: and_execz_mov_exec_vccz body: | bb.0: @@ -179,7 +179,7 @@ $exec = S_MOV_B64 -1 $vcc = S_AND_B64 $exec, $exec, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_exec_vccnz @@ -197,7 +197,7 @@ $exec = S_MOV_B64 -1 $vcc = S_AND_B64 $exec, $exec, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_reads_sreg_early @@ -217,7 +217,7 @@ $sgpr2 = S_MOV_B32 $sgpr1 $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_reads_sreg_late @@ -237,7 +237,7 @@ $vcc = S_AND_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc $sgpr2 = S_MOV_B32 $sgpr1 S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: and_execz_mov_vccz_reads_writes_sreg_early # GCN: $sgpr0_sgpr1 = S_MOV_B64 -1 @@ -257,7 +257,7 @@ $sgpr1 = S_MOV_B32 $sgpr0 $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_reads_cond @@ -277,7 +277,7 @@ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc $sgpr2 = S_MOV_B32 $vcc_lo S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_mov_vccz_modifies_sreg @@ -298,13 +298,13 @@ $sgpr0 = S_MOV_B32 0 $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_execz_imm_vccz_liveout_scc # GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc # GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec -# GCN-NEXT S_ENDPGM implicit $scc +# GCN-NEXT S_ENDPGM 0, implicit $scc name: and_execz_imm_vccz_liveout_scc body: | bb.0: @@ -316,5 +316,5 @@ bb.2: $vcc = S_AND_B64 $exec, -1, implicit-def $scc S_CBRANCH_VCCZ %bb.1, implicit killed $vcc - S_ENDPGM implicit $scc + S_ENDPGM 0, implicit $scc ... Index: test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir =================================================================== --- test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir +++ test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir @@ -18,10 +18,10 @@ # CHECK: bb.3: # CHECK-NEXT: EXP_DONE -# CHECK: S_ENDPGM +# CHECK: S_ENDPGM 0 # CHECK: bb.2: -# CHECK: S_ENDPGM +# CHECK: S_ENDPGM 0 name: kill_uncond_branch @@ -37,4 +37,4 @@ S_BRANCH %bb.2 bb.2: - S_ENDPGM + S_ENDPGM 0 Index: test/CodeGen/AMDGPU/inserted-wait-states.mir =================================================================== --- test/CodeGen/AMDGPU/inserted-wait-states.mir +++ test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -95,7 +95,7 @@ bb.3: $vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $exec $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... @@ -146,7 +146,7 @@ bb.3: S_SETREG_B32 $sgpr0, 0 $sgpr1 = S_GETREG_B32 1 - S_ENDPGM + S_ENDPGM 0 ... ... @@ -185,7 +185,7 @@ bb.2: S_SETREG_B32 $sgpr0, 1 S_SETREG_B32 $sgpr1, 0 - S_ENDPGM + S_ENDPGM 0 ... ... @@ -257,7 +257,7 @@ $vgpr3 = V_MOV_B32_e32 0, implicit $exec FLAT_ATOMIC_FCMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = V_MOV_B32_e32 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... @@ -319,7 +319,7 @@ bb.3: $vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec $vgpr4 = V_WRITELANE_B32 $sgpr4, $vcc_lo, $vgpr4 - S_ENDPGM + S_ENDPGM 0 ... @@ -348,7 +348,7 @@ bb.1: S_SETREG_B32 $sgpr0, 0 S_RFE_B64 $sgpr2_sgpr3 - S_ENDPGM + S_ENDPGM 0 ... @@ -377,7 +377,7 @@ bb.1: $sgpr0 = S_MOV_FED_B32 $sgpr0 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... @@ -427,7 +427,7 @@ bb.3: $m0 = S_MOV_B32 0 $sgpr0_sgpr1 = S_MOVRELD_B64 $sgpr0_sgpr1, implicit $m0 - S_ENDPGM + S_ENDPGM 0 ... ... @@ -476,7 +476,7 @@ bb.3: $m0 = S_MOV_B32 0 $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $m0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... ... @@ -510,7 +510,7 @@ bb.1: implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- name: mov_fed_hazard_crash_on_dbg_value @@ -557,6 +557,6 @@ $sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5 $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/invert-br-undef-vcc.mir =================================================================== --- test/CodeGen/AMDGPU/invert-br-undef-vcc.mir +++ test/CodeGen/AMDGPU/invert-br-undef-vcc.mir @@ -81,6 +81,6 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir +++ test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir @@ -110,7 +110,7 @@ liveins: $sgpr2_sgpr3 $exec = S_OR_B64 $exec, killed $sgpr2_sgpr3, implicit-def $scc - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir +++ test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir @@ -15,7 +15,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -31,7 +31,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(42)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -48,7 +48,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -64,6 +64,6 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront") seq_cst 4 on `i32 addrspace(42)* undef`) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory-legalizer-local.mir =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-local.mir +++ test/CodeGen/AMDGPU/memory-legalizer-local.mir @@ -21,7 +21,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -45,7 +45,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -69,7 +69,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -93,7 +93,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -117,7 +117,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -141,7 +141,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -165,7 +165,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -189,7 +189,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -213,7 +213,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -237,7 +237,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -261,7 +261,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -285,7 +285,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -309,7 +309,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -333,7 +333,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -357,7 +357,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -381,7 +381,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -405,7 +405,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -429,7 +429,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -453,7 +453,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -477,7 +477,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -488,7 +488,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_unordered body: | @@ -499,7 +499,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -510,7 +510,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_monotonic body: | @@ -521,7 +521,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -532,7 +532,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_release body: | @@ -543,7 +543,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -554,7 +554,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_seq_cst body: | @@ -565,7 +565,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -576,7 +576,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_unordered body: | @@ -587,7 +587,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -598,7 +598,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_monotonic body: | @@ -609,7 +609,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -620,7 +620,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_release body: | @@ -631,7 +631,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -642,7 +642,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_seq_cst body: | @@ -653,7 +653,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -664,7 +664,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_unordered body: | @@ -675,7 +675,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -686,7 +686,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_monotonic body: | @@ -697,7 +697,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -708,7 +708,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_release body: | @@ -719,7 +719,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -730,7 +730,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_seq_cst body: | @@ -741,7 +741,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -752,7 +752,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_unordered body: | @@ -763,7 +763,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -774,7 +774,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_monotonic body: | @@ -785,7 +785,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -796,7 +796,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_release body: | @@ -807,7 +807,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -818,7 +818,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_seq_cst body: | @@ -829,7 +829,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -840,7 +840,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_unordered body: | @@ -851,7 +851,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -862,7 +862,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_monotonic body: | @@ -873,7 +873,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -884,7 +884,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_release body: | @@ -895,7 +895,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -906,7 +906,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_seq_cst body: | @@ -917,7 +917,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -928,7 +928,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_unordered body: | @@ -939,7 +939,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -950,7 +950,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_monotonic body: | @@ -961,7 +961,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -972,7 +972,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_acquire body: | @@ -983,7 +983,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -994,7 +994,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_release body: | @@ -1005,7 +1005,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -1016,7 +1016,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_acq_rel body: | @@ -1027,7 +1027,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -1038,7 +1038,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_seq_cst body: | @@ -1049,6 +1049,6 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir +++ test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir @@ -60,6 +60,6 @@ $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir +++ test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir @@ -154,6 +154,6 @@ $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir +++ test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir @@ -134,6 +134,6 @@ $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory-legalizer-region.mir =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-region.mir +++ test/CodeGen/AMDGPU/memory-legalizer-region.mir @@ -21,7 +21,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -45,7 +45,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -69,7 +69,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -93,7 +93,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -117,7 +117,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -141,7 +141,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -165,7 +165,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -189,7 +189,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -213,7 +213,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -237,7 +237,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -261,7 +261,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -285,7 +285,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -309,7 +309,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -333,7 +333,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -357,7 +357,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -381,7 +381,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -405,7 +405,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -429,7 +429,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -453,7 +453,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -477,7 +477,7 @@ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -488,7 +488,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_unordered body: | @@ -499,7 +499,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -510,7 +510,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_monotonic body: | @@ -521,7 +521,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -532,7 +532,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_release body: | @@ -543,7 +543,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -554,7 +554,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_singlethread_seq_cst body: | @@ -565,7 +565,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -576,7 +576,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_unordered body: | @@ -587,7 +587,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -598,7 +598,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_monotonic body: | @@ -609,7 +609,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -620,7 +620,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_release body: | @@ -631,7 +631,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -642,7 +642,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_wavefront_seq_cst body: | @@ -653,7 +653,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -664,7 +664,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_unordered body: | @@ -675,7 +675,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -686,7 +686,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_monotonic body: | @@ -697,7 +697,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -708,7 +708,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_release body: | @@ -719,7 +719,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -730,7 +730,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_workgroup_seq_cst body: | @@ -741,7 +741,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -752,7 +752,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_unordered body: | @@ -763,8 +763,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(2)* undef`) - S_ENDPGM - + S_ENDPGM 0 ... --- @@ -774,7 +773,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_monotonic body: | @@ -785,7 +784,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -796,7 +795,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_release body: | @@ -807,7 +806,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -818,7 +817,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_agent_seq_cst body: | @@ -829,7 +828,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -840,7 +839,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_unordered body: | @@ -851,7 +850,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -862,7 +861,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_monotonic body: | @@ -873,7 +872,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -884,7 +883,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_release body: | @@ -895,7 +894,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -906,7 +905,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRITE_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: store_system_seq_cst body: | @@ -917,7 +916,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -928,7 +927,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_unordered body: | @@ -939,7 +938,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -950,7 +949,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_monotonic body: | @@ -961,7 +960,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -972,7 +971,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_acquire body: | @@ -983,7 +982,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -994,7 +993,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_release body: | @@ -1005,7 +1004,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -1016,7 +1015,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_acq_rel body: | @@ -1027,7 +1026,7 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -1038,7 +1037,7 @@ # GCN-NOT: S_WAITCNT # GCN: DS_WRXCHG_RTN_B32 # GCN-NOT: S_WAITCNT -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: atomicrmw_singlethread_seq_cst body: | @@ -1049,6 +1048,6 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/memory_clause.mir =================================================================== --- test/CodeGen/AMDGPU/memory_clause.mir +++ test/CodeGen/AMDGPU/memory_clause.mir @@ -366,7 +366,7 @@ # GCN-NEXT: dead %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 0, implicit $exec, implicit $flat_scr # GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr # GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: atomic @@ -384,5 +384,5 @@ %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 0, implicit $exec, implicit $flat_scr FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/merge-load-store-physreg.mir =================================================================== --- test/CodeGen/AMDGPU/merge-load-store-physreg.mir +++ test/CodeGen/AMDGPU/merge-load-store-physreg.mir @@ -62,7 +62,7 @@ %12:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64) - S_ENDPGM + S_ENDPGM 0 ... @@ -111,6 +111,6 @@ %11:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/merge-load-store-vreg.mir =================================================================== --- test/CodeGen/AMDGPU/merge-load-store-vreg.mir +++ test/CodeGen/AMDGPU/merge-load-store-vreg.mir @@ -73,7 +73,7 @@ S_BRANCH %bb.2 bb.1: - S_ENDPGM + S_ENDPGM 0 bb.2: %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec @@ -108,7 +108,7 @@ S_BRANCH %bb.2 bb.1: - S_ENDPGM + S_ENDPGM 0 bb.2: %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec @@ -139,7 +139,7 @@ S_BRANCH %bb.2 bb.1: - S_ENDPGM + S_ENDPGM 0 bb.2: %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec Index: test/CodeGen/AMDGPU/merge-load-store.mir =================================================================== --- test/CodeGen/AMDGPU/merge-load-store.mir +++ test/CodeGen/AMDGPU/merge-load-store.mir @@ -100,7 +100,7 @@ %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.4) %5:vgpr_32 = V_ADD_I32_e32 killed %3, killed %4, implicit-def $vcc, implicit $exec DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed $m0, implicit $exec :: (store 4 into %ir.ptr.0) - S_ENDPGM + S_ENDPGM 0 ... --- Index: test/CodeGen/AMDGPU/misched-killflags.mir =================================================================== --- test/CodeGen/AMDGPU/misched-killflags.mir +++ test/CodeGen/AMDGPU/misched-killflags.mir @@ -23,7 +23,7 @@ $vgpr2 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr3 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec S_NOP 0, implicit killed $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 - S_ENDPGM + S_ENDPGM 0 ... # CHECK-LABEL: name: func0 # CHECK-DAG: $sgpr10 = S_MOV_B32 5 @@ -42,4 +42,4 @@ # CHECK: $vgpr2 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 # CHECK: $vgpr3 = V_MOV_B32_e32 killed $sgpr11, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec # CHECK: S_NOP 0, implicit killed $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 -# CHECK: S_ENDPGM +# CHECK: S_ENDPGM 0 Index: test/CodeGen/AMDGPU/mode-register.mir =================================================================== --- test/CodeGen/AMDGPU/mode-register.mir +++ test/CodeGen/AMDGPU/mode-register.mir @@ -23,7 +23,7 @@ $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $m0, implicit $exec $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that the mode is not changed for interp f16 when the mode is already RTZ @@ -49,7 +49,7 @@ $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $m0, implicit $exec $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that explicit RTN mode change is registered @@ -75,7 +75,7 @@ $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec S_SETREG_IMM32_B32 0, 2177 $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that the mode is unchanged from RTN for F64 instruction @@ -90,7 +90,7 @@ bb.0: liveins: $vgpr1_vgpr2 $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that the mode is changed from RTZ to RTN for F64 instruction @@ -108,7 +108,7 @@ liveins: $vgpr1_vgpr2 S_SETREG_IMM32_B32 3, 2177 $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # CHECK-LABEL: name: rtz_from_rtn @@ -127,7 +127,7 @@ bb.1: $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that the mode is changed from RTZ to RTN for F64 instruction @@ -157,7 +157,7 @@ $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec $vgpr0 = V_ADD_F16_e32 killed $sgpr0, killed $vgpr0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that an explicit change to the single precision mode has no effect @@ -187,7 +187,7 @@ $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec $vgpr0 = V_ADD_F16_e32 killed $sgpr0, killed $vgpr0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that mode is propagated back to start of loop - first instruction is RTN but needs @@ -223,7 +223,7 @@ S_BRANCH %bb.3 bb.3: - S_ENDPGM + S_ENDPGM 0 ... --- # two back-edges to same node with different modes @@ -272,7 +272,7 @@ S_BRANCH %bb.6 bb.6: - S_ENDPGM + S_ENDPGM 0 ... --- # check that mode is propagated back to start of loop and through a block that @@ -311,7 +311,7 @@ S_BRANCH %bb.4 bb.4: - S_ENDPGM + S_ENDPGM 0 ... --- # check that multiple mode values are propagated to a block that uses the mode @@ -346,7 +346,7 @@ S_BRANCH %bb.4 bb.4: - S_ENDPGM + S_ENDPGM 0 ... --- # check that multiple mode values are propagated through a block that neither @@ -387,7 +387,7 @@ S_BRANCH %bb.5 bb.5: - S_ENDPGM + S_ENDPGM 0 ... --- # CHECK-LABEL: name: pass_through_blocks @@ -420,7 +420,7 @@ bb.4: $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # check that multiple mode values are propagated @@ -455,5 +455,5 @@ S_BRANCH %bb.4 bb.4: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/movrels-bug.mir =================================================================== --- test/CodeGen/AMDGPU/movrels-bug.mir +++ test/CodeGen/AMDGPU/movrels-bug.mir @@ -26,6 +26,6 @@ $vgpr1 = V_MOVRELS_B32_e32 undef $vgpr1, implicit $m0, implicit $exec, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr4 = V_MAC_F32_e32 undef $vgpr0, undef $vgpr0, undef $vgpr4, implicit $exec EXP_DONE 15, undef $vgpr0, killed $vgpr1, killed $vgpr4, undef $vgpr0, 0, 0, 12, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/omod-nsz-flag.mir =================================================================== --- test/CodeGen/AMDGPU/omod-nsz-flag.mir +++ test/CodeGen/AMDGPU/omod-nsz-flag.mir @@ -21,7 +21,7 @@ # GCN-LABEL: name: omod_inst_flag_nsz_src # GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec # GCN-NEXT: %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec -# GCN-NEXT: S_ENDPGM implicit %1 +# GCN-NEXT: S_ENDPGM 0, implicit %1 name: omod_inst_flag_nsz_src tracksRegLiveness: true @@ -31,14 +31,14 @@ %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec - S_ENDPGM implicit %1 + S_ENDPGM 0, implicit %1 ... --- # GCN-LABEL: name: omod_inst_flag_nsz_result # GCN: %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec -# GCN-NEXT: S_ENDPGM implicit %0 +# GCN-NEXT: S_ENDPGM 0, implicit %0 name: omod_inst_flag_nsz_result tracksRegLiveness: true @@ -49,14 +49,14 @@ %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec - S_ENDPGM implicit %1 + S_ENDPGM 0, implicit %1 ... --- # GCN-LABEL: name: omod_inst_flag_nsz_both # GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec -# GCN-NEXT: S_ENDPGM implicit %0 +# GCN-NEXT: S_ENDPGM 0, implicit %0 name: omod_inst_flag_nsz_both tracksRegLiveness: true @@ -67,5 +67,5 @@ %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec - S_ENDPGM implicit %1 + S_ENDPGM 0, implicit %1 ... Index: test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir =================================================================== --- test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir +++ test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir @@ -141,7 +141,7 @@ bb.2.bb2: SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -247,7 +247,7 @@ bb.2.bb2: SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -336,6 +336,6 @@ bb.2.bb2: SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/optimize-if-exec-masking.mir =================================================================== --- test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -160,7 +160,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -197,7 +197,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -234,7 +234,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -275,7 +275,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -313,7 +313,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -357,7 +357,7 @@ $sgpr2 = S_MOV_B32 -1 $sgpr3 = S_MOV_B32 61440 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -396,7 +396,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... @@ -435,7 +435,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -472,7 +472,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -509,7 +509,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -548,5 +548,5 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir =================================================================== --- test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir +++ test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir @@ -19,7 +19,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3 @@ -41,7 +41,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_redef_vcc1 @@ -67,7 +67,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_redef_vcc2 @@ -93,7 +93,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_redef_cmp @@ -119,7 +119,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_undef_vcc @@ -137,7 +137,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_imp_vcc @@ -159,7 +159,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_imp_vcc @@ -181,7 +181,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_redef_sel @@ -207,7 +207,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_used_sel @@ -231,7 +231,7 @@ bb.2: $vgpr0 = COPY %1 - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_used_vcc @@ -257,7 +257,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_sel_wrong_subreg1 @@ -283,7 +283,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_sel_wrong_subreg2 @@ -309,7 +309,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_sel_right_subreg1 @@ -333,7 +333,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_sel_right_subreg2 @@ -357,7 +357,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop3_sel_subreg_overlap @@ -383,7 +383,7 @@ S_BRANCH %bb.0 bb.2: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_dominated_blocks @@ -407,7 +407,7 @@ S_BRANCH %bb.1 bb.3: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_different_blocks_cmp_and @@ -431,7 +431,7 @@ S_BRANCH %bb.1 bb.3: - S_ENDPGM + S_ENDPGM 0 ... # GCN: name: negated_cond_vop2_not_dominated_blocks @@ -461,5 +461,5 @@ S_BRANCH %bb.2 bb.4: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/reduce-saveexec.mir =================================================================== --- test/CodeGen/AMDGPU/reduce-saveexec.mir +++ test/CodeGen/AMDGPU/reduce-saveexec.mir @@ -3,7 +3,7 @@ --- # GCN-LABEL: name: reduce_and_saveexec # GCN: $exec = S_AND_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_and_saveexec tracksRegLiveness: true body: | @@ -11,12 +11,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_AND_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_and_saveexec_commuted # GCN: $exec = S_AND_B64 killed $vcc, $exec -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_and_saveexec_commuted tracksRegLiveness: true body: | @@ -24,7 +24,7 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_AND_B64 killed $vcc, $exec, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_and_saveexec_liveout @@ -37,12 +37,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_AND_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: and_saveexec # GCN: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: and_saveexec tracksRegLiveness: true body: | @@ -51,12 +51,12 @@ $sgpr0_sgpr1 = COPY $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc $exec = S_MOV_B64_term $sgpr2_sgpr3 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_or_saveexec # GCN: $exec = S_OR_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_or_saveexec tracksRegLiveness: true body: | @@ -64,12 +64,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_xor_saveexec # GCN: $exec = S_XOR_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_xor_saveexec tracksRegLiveness: true body: | @@ -77,12 +77,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_andn2_saveexec # GCN: $exec = S_ANDN2_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_andn2_saveexec tracksRegLiveness: true body: | @@ -90,12 +90,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_ANDN2_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_orn2_saveexec # GCN: $exec = S_ORN2_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_orn2_saveexec tracksRegLiveness: true body: | @@ -103,12 +103,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_ORN2_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_nand_saveexec # GCN: $exec = S_NAND_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_nand_saveexec tracksRegLiveness: true body: | @@ -116,12 +116,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_NAND_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_nor_saveexec # GCN: $exec = S_NOR_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_nor_saveexec tracksRegLiveness: true body: | @@ -129,12 +129,12 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_NOR_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- # GCN-LABEL: name: reduce_xnor_saveexec # GCN: $exec = S_XNOR_B64 $exec, killed $vcc -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 name: reduce_xnor_saveexec tracksRegLiveness: true body: | @@ -142,6 +142,6 @@ $vcc = IMPLICIT_DEF $sgpr0_sgpr1 = S_XNOR_B64 $exec, killed $vcc, implicit-def $scc $exec = COPY killed $sgpr0_sgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- Index: test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir =================================================================== --- test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir +++ test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir @@ -234,6 +234,6 @@ bb.34: bb.35: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/regcoal-subrange-join.mir =================================================================== --- test/CodeGen/AMDGPU/regcoal-subrange-join.mir +++ test/CodeGen/AMDGPU/regcoal-subrange-join.mir @@ -157,6 +157,6 @@ bb.4: EXP 32, undef %53, undef %54, killed %46, undef %55, 0, 0, 15, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/regcoalesce-cannot-join-failures.mir =================================================================== --- test/CodeGen/AMDGPU/regcoalesce-cannot-join-failures.mir +++ test/CodeGen/AMDGPU/regcoalesce-cannot-join-failures.mir @@ -14,7 +14,7 @@ ; CHECK: %0.sub1:sreg_64_xexec = COPY %0.sub0 ; CHECK: S_BRANCH %bb.2 ; CHECK: bb.2: - ; CHECK: S_ENDPGM implicit %0 + ; CHECK: S_ENDPGM 0, implicit %0 bb.0: successors: %bb.1 @@ -29,7 +29,7 @@ bb.2: dead %2:sreg_32_xm0 = COPY %0.sub0:sreg_64_xexec - S_ENDPGM implicit killed %1 + S_ENDPGM 0, implicit killed %1 ... --- @@ -40,11 +40,11 @@ ; CHECK-LABEL: name: couldnt_join_subrange_no_implicit_def_inst ; CHECK: undef %0.sub0:sreg_64 = S_MOV_B32 0 ; CHECK: %0.sub1:sreg_64 = COPY %0.sub0 - ; CHECK: S_ENDPGM implicit %0.sub1 + ; CHECK: S_ENDPGM 0, implicit %0.sub1 undef %0.sub0:sreg_64 = S_MOV_B32 0 %1:sreg_64 = COPY %0:sreg_64 %0.sub1:sreg_64 = COPY %0.sub0:sreg_64 - S_ENDPGM implicit %1.sub1:sreg_64 + S_ENDPGM 0, implicit %1.sub1:sreg_64 ... --- @@ -59,7 +59,7 @@ ; CHECK: %0.sub0:sreg_64 = S_MOV_B32 0 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %0 ; CHECK: dead %0.sub1:sreg_64 = COPY %0.sub0 - ; CHECK: S_ENDPGM implicit [[COPY]].sub1 + ; CHECK: S_ENDPGM 0, implicit [[COPY]].sub1 bb.0: successors: %bb.1 undef %0.sub1:sreg_64 = S_MOV_B32 -1 @@ -68,7 +68,7 @@ %0.sub0:sreg_64 = S_MOV_B32 0 %1:sreg_64 = COPY %0:sreg_64 dead %0.sub1:sreg_64 = COPY %0.sub0:sreg_64 - S_ENDPGM implicit %1.sub1:sreg_64 + S_ENDPGM 0, implicit %1.sub1:sreg_64 ... --- @@ -82,13 +82,13 @@ ; CHECK: %0.sub1:sreg_64_xexec = S_MOV_B32 0 ; CHECK: S_NOP 0, implicit %0.sub1 ; CHECK: S_NOP 0, implicit %0 - ; CHECK: S_ENDPGM + ; CHECK: S_ENDPGM 0 undef %0.sub0:sreg_64_xexec = S_MOV_B32 0 %1:sreg_64 = COPY %0 %0.sub1:sreg_64_xexec = S_MOV_B32 0 S_NOP 0, implicit %0.sub1 S_NOP 0, implicit %1 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -102,7 +102,7 @@ ; CHECK: %0.sub1:sreg_64_xexec = COPY %0.sub0 ; CHECK: bb.1: ; CHECK: S_NOP 0, implicit %0.sub1 - ; CHECK: S_ENDPGM implicit %0 + ; CHECK: S_ENDPGM 0, implicit %0 bb.0: successors: %bb.1 @@ -113,6 +113,6 @@ bb.1: S_NOP 0, implicit %0.sub1 - S_ENDPGM implicit %1 + S_ENDPGM 0, implicit %1 ... Index: test/CodeGen/AMDGPU/regcoalesce-dbg.mir =================================================================== --- test/CodeGen/AMDGPU/regcoalesce-dbg.mir +++ test/CodeGen/AMDGPU/regcoalesce-dbg.mir @@ -71,6 +71,6 @@ %20 = V_LSHL_B64 killed %19, 2, implicit $exec %16 = COPY killed %5 BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir =================================================================== --- test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir +++ test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir @@ -16,7 +16,7 @@ ; CHECK: %0.sub0:sreg_64 = S_MOV_B32 0 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %0 ; CHECK: dead %0.sub1:sreg_64 = COPY %0.sub0 - ; CHECK: S_ENDPGM implicit [[COPY]].sub1 + ; CHECK: S_ENDPGM 0, implicit [[COPY]].sub1 bb.0: successors: %bb.1 undef %0.sub1:sreg_64 = IMPLICIT_DEF @@ -25,7 +25,7 @@ %0.sub0:sreg_64 = S_MOV_B32 0 %1:sreg_64 = COPY %0:sreg_64 dead %0.sub1:sreg_64 = COPY %0.sub0:sreg_64 - S_ENDPGM implicit %1.sub1:sreg_64 + S_ENDPGM 0, implicit %1.sub1:sreg_64 ... --- @@ -43,7 +43,7 @@ ; CHECK: %0.sub0:sreg_64 = S_MOV_B32 0 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %0 ; CHECK: dead %0.sub1:sreg_64 = COPY %0.sub0 - ; CHECK: S_ENDPGM implicit [[COPY]].sub1 + ; CHECK: S_ENDPGM 0, implicit [[COPY]].sub1 bb.0: successors: %bb.1 undef %0.sub1:sreg_64 = S_MOV_B32 -1 @@ -52,6 +52,6 @@ %0.sub0:sreg_64 = S_MOV_B32 0 %1:sreg_64 = COPY %0:sreg_64 dead %0.sub1:sreg_64 = COPY %0.sub0:sreg_64 - S_ENDPGM implicit %1.sub1:sreg_64 + S_ENDPGM 0, implicit %1.sub1:sreg_64 ... Index: test/CodeGen/AMDGPU/regcoalesce-prune.mir =================================================================== --- test/CodeGen/AMDGPU/regcoalesce-prune.mir +++ test/CodeGen/AMDGPU/regcoalesce-prune.mir @@ -27,5 +27,5 @@ bb.3: %3 : vgpr_32 = V_CVT_F32_I32_e32 killed %6.sub1, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir =================================================================== --- test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir +++ test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir @@ -191,9 +191,9 @@ S_BRANCH %bb.29 bb.33: - S_ENDPGM + S_ENDPGM 0 bb.34: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir =================================================================== --- test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir +++ test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir @@ -64,7 +64,7 @@ undef %15.sub0 = COPY killed %16 %15.sub1 = COPY killed %14 FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- Index: test/CodeGen/AMDGPU/scalar-store-cache-flush.mir =================================================================== --- test/CodeGen/AMDGPU/scalar-store-cache-flush.mir +++ test/CodeGen/AMDGPU/scalar-store-cache-flush.mir @@ -49,7 +49,7 @@ # CHECK: bb.0: # CHECK-NEXT: S_STORE_DWORD # CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 name: basic_insert_dcache_wb tracksRegLiveness: false @@ -57,7 +57,7 @@ body: | bb.0: S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # Already has an explicitly requested flush after the last store. @@ -65,7 +65,7 @@ # CHECK: bb.0: # CHECK-NEXT: S_STORE_DWORD # CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 name: explicit_flush_after tracksRegLiveness: false @@ -74,7 +74,7 @@ bb.0: S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 S_DCACHE_WB - S_ENDPGM + S_ENDPGM 0 ... --- # Already has an explicitly requested flush before the last store. @@ -83,7 +83,7 @@ # CHECK-NEXT: S_DCACHE_WB # CHECK-NEXT: S_STORE_DWORD # CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 name: explicit_flush_before tracksRegLiveness: false @@ -92,30 +92,30 @@ bb.0: S_DCACHE_WB S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # CHECK-LABEL: no_scalar_store # CHECK: bb.0 -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 name: no_scalar_store tracksRegLiveness: false body: | bb.0: - S_ENDPGM + S_ENDPGM 0 ... # CHECK-LABEL: name: multi_block_store # CHECK: bb.0: # CHECK-NEXT: S_STORE_DWORD # CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 # CHECK: bb.1: # CHECK-NEXT: S_STORE_DWORD # CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 name: multi_block_store tracksRegLiveness: false @@ -123,11 +123,11 @@ body: | bb.0: S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 - S_ENDPGM + S_ENDPGM 0 bb.1: S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0 - S_ENDPGM + S_ENDPGM 0 ... ... @@ -137,23 +137,23 @@ # CHECK-LABEL: name: one_block_store # CHECK: bb.0: # CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 # CHECK: bb.1: # CHECK-NEXT: S_STORE_DWORD # CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 name: one_block_store tracksRegLiveness: false body: | bb.0: - S_ENDPGM + S_ENDPGM 0 bb.1: S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0 - S_ENDPGM + S_ENDPGM 0 ... --- # CHECK-LABEL: name: si_return Index: test/CodeGen/AMDGPU/sched-crash-dbg-value.mir =================================================================== --- test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -329,6 +329,6 @@ dead $sgpr30_sgpr31 = SI_CALL %127, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3 ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32 %128:vreg_64, dead %129:sreg_64 = V_MAD_I64_I32 %20, %34, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/schedule-regpressure.mir =================================================================== --- test/CodeGen/AMDGPU/schedule-regpressure.mir +++ test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -52,6 +52,6 @@ %7 = COPY %5 %6 = DS_READ_B32 %7, 0, 0, implicit $m0, implicit $exec DS_WRITE_B32 %7, %6, 4, 0, implicit killed $m0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/sdwa-preserve.mir =================================================================== --- test/CodeGen/AMDGPU/sdwa-preserve.mir +++ test/CodeGen/AMDGPU/sdwa-preserve.mir @@ -96,7 +96,7 @@ %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -139,6 +139,6 @@ %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/sdwa-scalar-ops.mir =================================================================== --- test/CodeGen/AMDGPU/sdwa-scalar-ops.mir +++ test/CodeGen/AMDGPU/sdwa-scalar-ops.mir @@ -210,7 +210,7 @@ S_BRANCH %bb.2.bb2 bb.1.bb1: - S_ENDPGM + S_ENDPGM 0 bb.2.bb2: successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000) @@ -373,7 +373,7 @@ S_BRANCH %bb.2.bb2 bb.1.bb1: - S_ENDPGM + S_ENDPGM 0 bb.2.bb2: successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000) Index: test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir =================================================================== --- test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir +++ test/CodeGen/AMDGPU/sendmsg-m0-hazard.mir @@ -15,7 +15,7 @@ bb.0: $m0 = S_MOV_B32 -1 S_SENDMSG 3, implicit $exec, implicit $m0 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -30,7 +30,7 @@ bb.0: $m0 = S_MOV_B32 -1 S_SENDMSGHALT 3, implicit $exec, implicit $m0 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -45,5 +45,5 @@ bb.0: $m0 = S_MOV_B32 -1 S_TTRACEDATA implicit $m0 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir =================================================================== --- test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir +++ test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir @@ -86,7 +86,7 @@ %29, %9 = V_ADD_I32_e64 %19, %17, implicit $exec %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -170,7 +170,7 @@ %29, %9 = V_SUB_I32_e64 %19, %17, implicit $exec %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -254,7 +254,7 @@ %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit $exec %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -338,7 +338,7 @@ %29, $vcc = V_ADDC_U32_e64 %19, %17, %9, implicit $exec %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -423,7 +423,7 @@ %29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, implicit $exec %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... @@ -507,6 +507,6 @@ %29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, implicit $exec %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir =================================================================== --- test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir +++ test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -42,7 +42,7 @@ # GCN-LABEL: name: dead_illegal_virtreg_copy # GCN: %0:vgpr_32 = COPY $vgpr0 # GCN: %1:sreg_32_xm0 = IMPLICIT_DEF -# GCN: S_ENDPGM implicit %0 +# GCN: S_ENDPGM 0, implicit %0 name: dead_illegal_virtreg_copy tracksRegLiveness: true @@ -52,7 +52,7 @@ liveins: $vgpr0 %0:vgpr_32 = COPY $vgpr0 %1:sreg_32_xm0 = COPY %0 - S_ENDPGM implicit %1 + S_ENDPGM 0, implicit %1 ... --- @@ -60,7 +60,7 @@ # GCN-LABEL: name: dead_illegal_physreg_copy # GCN %2:vgpr_32 = COPY $vgpr0 # GCN: %1:sreg_32_xm0 = IMPLICIT_DEF -# GCN: S_ENDPGM implicit %2 +# GCN: S_ENDPGM 0, implicit %2 name: dead_illegal_physreg_copy tracksRegLiveness: true @@ -70,5 +70,5 @@ liveins: $vgpr0 %0:sreg_32_xm0 = COPY $vgpr0 %1:sreg_32_xm0 = COPY %0 - S_ENDPGM implicit %1 + S_ENDPGM 0, implicit %1 ... Index: test/CodeGen/AMDGPU/si-lower-control-flow.mir =================================================================== --- test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -13,12 +13,12 @@ ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0 ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc ; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc - ; GCN: S_ENDPGM + ; GCN: S_ENDPGM 0 %0:sgpr_64 = COPY $sgpr4_sgpr5 %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 %2:sreg_32_xm0 = S_AND_B32 %1, 255, implicit-def $scc %3:sreg_32_xm0 = S_AND_B32 65535, %2, implicit-def $scc - S_ENDPGM + S_ENDPGM 0 ... --- @@ -37,7 +37,7 @@ ; GCN: bb.1: ; GCN: successors: %bb.2(0x80000000) ; GCN: bb.2: - ; GCN: S_ENDPGM + ; GCN: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 @@ -48,6 +48,6 @@ successors: %bb.2 bb.2: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir =================================================================== --- test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir +++ test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir @@ -40,7 +40,7 @@ %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load 8) undef %9.sub0:vreg_128 = V_LSHL_ADD_U32 %6, 4, %4, implicit $exec %9.sub1:vreg_128 = V_LSHL_ADD_U32 %5, 4, %0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/subreg-split-live-in-error.mir =================================================================== --- test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -278,5 +278,5 @@ %68:vgpr_32 = V_MUL_F32_e32 0, %4, implicit $exec %69:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %70:vgpr_32, 0, %68, 0, 0, implicit $exec EXP 0, undef %71:vgpr_32, %69, undef %72:vgpr_32, undef %73:vgpr_32, -1, -1, 15, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir =================================================================== --- test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -74,7 +74,7 @@ liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, $sgpr4_sgpr5 $vcc = COPY $vgpr1 - S_ENDPGM + S_ENDPGM 0 ... --- @@ -138,6 +138,6 @@ liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, $sgpr4_sgpr5 $vcc = COPY $vgpr1 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/v_swap_b32.mir =================================================================== --- test/CodeGen/AMDGPU/v_swap_b32.mir +++ test/CodeGen/AMDGPU/v_swap_b32.mir @@ -93,7 +93,7 @@ $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 0, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_copy_condense @@ -176,7 +176,7 @@ # GCN-NEXT: %3:vgpr_32 = COPY %0 # GCN-NEXT: %0:vgpr_32 = COPY %1 # GCN-NEXT: %1:vgpr_32 = COPY %2 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_read_x @@ -193,7 +193,7 @@ %3 = COPY %0 %0 = COPY %1 %1 = COPY %2 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_read_t_twice @@ -203,7 +203,7 @@ # GCN-NEXT: %2:vgpr_32 = COPY %0 # GCN-NEXT: %3:vgpr_32 = COPY %2 # GCN-NEXT: %0:vgpr_32, %1:vgpr_32 = V_SWAP_B32 %1, %0, implicit $exec -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_read_t_twice @@ -220,7 +220,7 @@ %3 = COPY %2 %0 = COPY %1 %1 = COPY %2 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_clobber_y @@ -231,7 +231,7 @@ # GCN-NEXT: %0:vgpr_32 = COPY %1 # GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = COPY %2 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_clobber_y @@ -247,7 +247,7 @@ %0 = COPY %1 %1 = IMPLICIT_DEF %1 = COPY %2 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_clobber_x1 @@ -258,7 +258,7 @@ # GCN-NEXT: %0:vgpr_32 = COPY %1 # GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = COPY %2 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_clobber_x1 @@ -274,7 +274,7 @@ %0 = COPY %1 %0 = IMPLICIT_DEF %1 = COPY %2 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_clobber_x2 @@ -285,7 +285,7 @@ # GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF # GCN-NEXT: %0:vgpr_32 = COPY %1 # GCN-NEXT: %1:vgpr_32 = COPY %2 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_clobber_x2 @@ -301,7 +301,7 @@ %0 = IMPLICIT_DEF %0 = COPY %1 %1 = COPY %2 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_clobber_t @@ -312,7 +312,7 @@ # GCN-NEXT: %0:vgpr_32 = COPY %1 # GCN-NEXT: %2:vgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = COPY %2 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_clobber_t @@ -328,7 +328,7 @@ %0 = COPY %1 %2 = IMPLICIT_DEF %1 = COPY %2 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_copy_subreg_overlap_x_full @@ -453,7 +453,7 @@ # GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF # GCN-NEXT: %0.sub0:vreg_128, %1.sub0:vreg_128 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec # GCN-NEXT: %0.sub1:vreg_128, %1.sub1:vreg_128 = V_SWAP_B32 %1.sub1, %0.sub1, implicit $exec -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_b128_sub0_1 registers: @@ -467,7 +467,7 @@ %2.sub0_sub1 = COPY %0.sub0_sub1 %0.sub0_sub1 = COPY %1.sub0_sub1 %1.sub0_sub1 = COPY %2.sub0_sub1 - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: swap_virt_b128_sub2_3 @@ -476,7 +476,7 @@ # GCN-NEXT: %1:vreg_128 = IMPLICIT_DEF # GCN-NEXT: %0.sub2:vreg_128, %1.sub2:vreg_128 = V_SWAP_B32 %1.sub2, %0.sub2, implicit $exec # GCN-NEXT: %0.sub3:vreg_128, %1.sub3:vreg_128 = V_SWAP_B32 %1.sub3, %0.sub3, implicit $exec -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_b128_sub2_3 registers: @@ -490,7 +490,7 @@ %2.sub2_sub3 = COPY %0.sub2_sub3 %0.sub2_sub3 = COPY %1.sub2_sub3 %1.sub2_sub3 = COPY %2.sub2_sub3 - S_ENDPGM + S_ENDPGM 0 ... @@ -544,7 +544,7 @@ # GCN-NEXT: %0.sub0:vreg_64 = COPY %1.sub0, implicit %0 # GCN-NEXT: %0.sub1:vreg_64 = COPY %1.sub1 # GCN-NEXT: %1.sub0:vreg_64 = COPY %2.sub0 -# GCN-NEXT: S_ENDPGM +# GCN-NEXT: S_ENDPGM 0 --- name: swap_virt_copy_subreg_impuse_x registers: @@ -560,5 +560,5 @@ %0.sub0 = COPY %1.sub0, implicit %0 %0.sub1 = COPY %1.sub1 %1.sub0 = COPY %2.sub0 - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir =================================================================== --- test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -105,7 +105,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) - S_ENDPGM + S_ENDPGM 0 ... --- @@ -166,6 +166,6 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/vop-shrink-frame-index.mir =================================================================== --- test/CodeGen/AMDGPU/vop-shrink-frame-index.mir +++ test/CodeGen/AMDGPU/vop-shrink-frame-index.mir @@ -51,7 +51,7 @@ %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: fold_vgpr_fi{{$}} @@ -72,7 +72,7 @@ %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF %2, $vcc = V_ADD_I32_e64 %1, %0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: fold_sgpr_fi{{$}} @@ -94,7 +94,7 @@ %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF %2, $vcc = V_ADD_I32_e64 %1, %0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: fold_fi_sgpr{{$}} @@ -116,7 +116,7 @@ %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = IMPLICIT_DEF %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # TODO: Should probably prefer folding immediate first # GCN-LABEL: name: fold_fi_imm{{$}} @@ -137,7 +137,7 @@ %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = V_MOV_B32_e32 999, implicit $exec %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: fold_imm_fi{{$}} @@ -158,4 +158,4 @@ %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec %1 = V_MOV_B32_e32 999, implicit $exec %2, $vcc = V_ADD_I32_e64 %1, %0, implicit $exec - S_ENDPGM + S_ENDPGM 0 Index: test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir =================================================================== --- test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir +++ test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir @@ -17,7 +17,7 @@ %0 = V_MOV_B32_e32 123, implicit $exec %1 = V_MOV_B32_e32 456, implicit $exec %2, $vcc = V_ADD_I32_e64 %0, %1, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... # GCN-LABEL: name: fold_partially_defined_superreg{{$}} @@ -35,6 +35,6 @@ undef %3.sub0 = V_MOV_B32_e32 123, implicit $exec, implicit-def %3 %1 = V_MOV_B32_e32 456, implicit $exec %2, $vcc = V_ADD_I32_e64 %3.sub0, %1, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir =================================================================== --- test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir +++ test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir @@ -55,7 +55,7 @@ $vgpr4 = V_MAC_F32_e32 killed $vgpr0, killed $vgpr3, killed $vgpr4, implicit $exec EXP_DONE 12, killed $vgpr4, undef $vgpr0, undef $vgpr0, undef $vgpr0, 0, 0, 15, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- @@ -89,5 +89,5 @@ bb.6: S_CBRANCH_SCC1 %bb.0, implicit $scc - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir =================================================================== --- test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir +++ test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir @@ -48,7 +48,7 @@ bb.4: - S_ENDPGM + S_ENDPGM 0 ... @@ -67,7 +67,7 @@ # GCN: BUFFER_ATOMIC_ADD_OFFSET_RTN # GCN: S_WAITCNT 3952 # GCN: FLAT_STORE_DWORD -# GCN: S_ENDPGM +# GCN: S_ENDPGM 0 name: irreducible_loop_extended body: | @@ -99,5 +99,5 @@ bb.6: FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir =================================================================== --- test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir +++ test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir @@ -22,5 +22,5 @@ $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, implicit $exec S_CBRANCH_SCC1 %bb.1, implicit $scc bb.2: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/waitcnt-preexisting.mir =================================================================== --- test/CodeGen/AMDGPU/waitcnt-preexisting.mir +++ test/CodeGen/AMDGPU/waitcnt-preexisting.mir @@ -33,5 +33,5 @@ $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $exec $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $exec IMAGE_STORE_V4_V2 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/AMDGPU/waitcnt.mir =================================================================== --- test/CodeGen/AMDGPU/waitcnt.mir +++ test/CodeGen/AMDGPU/waitcnt.mir @@ -60,7 +60,7 @@ $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- # There is only a single fallthrough successor block, so there's no @@ -84,7 +84,7 @@ bb.1: $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... --- # The block has a single predecessor with a single successor, but it @@ -95,7 +95,7 @@ # CHECK: bb.1 # CHECK-NEXT: FLAT_STORE_DWORD -# CHECK-NEXT: S_ENDPGM +# CHECK-NEXT: S_ENDPGM 0 # CHECK: bb.2: # CHECK-NEXT: V_LSHLREV_B64 @@ -111,10 +111,10 @@ bb.1: FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 bb.2: $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir +++ test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir @@ -45,5 +45,5 @@ $sgpr6 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir +++ test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir @@ -45,5 +45,5 @@ $sgpr6 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir +++ test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir @@ -23,6 +23,6 @@ %16:sgpr_128 = REG_SEQUENCE killed %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1, %vreg123_2, %subreg.sub2, %vreg123_3, %subreg.sub3 BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/MIR/AMDGPU/stack-id.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/stack-id.mir +++ test/CodeGen/MIR/AMDGPU/stack-id.mir @@ -31,5 +31,5 @@ body: | bb.0: - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/MIR/AMDGPU/syncscopes.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/syncscopes.mir +++ test/CodeGen/MIR/AMDGPU/syncscopes.mir @@ -95,6 +95,6 @@ $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $sgpr4_sgpr5, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/MIR/AMDGPU/target-flags.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/target-flags.mir +++ test/CodeGen/MIR/AMDGPU/target-flags.mir @@ -24,9 +24,9 @@ ; CHECK-LABEL: name: flags ; CHECK: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo - ; CHECK: S_ENDPGM + ; CHECK: S_ENDPGM 0 %0 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc %1 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo - S_ENDPGM + S_ENDPGM 0 ... Index: test/CodeGen/MIR/AMDGPU/target-index-operands.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/target-index-operands.mir +++ test/CodeGen/MIR/AMDGPU/target-index-operands.mir @@ -53,7 +53,7 @@ $sgpr6 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... --- name: float2 @@ -83,5 +83,5 @@ $sgpr6 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec - S_ENDPGM + S_ENDPGM 0 ... Index: test/MC/AMDGPU/s_endpgm.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/s_endpgm.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -filetype=obj %s | llvm-objcopy -S -K keep_symbol - | llvm-objdump -disassemble -mcpu=gfx900 - | FileCheck %s --check-prefix=BIN + +// GCN: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +// BIN: s_endpgm // 000000000000: BF810000 +s_endpgm + +// GCN: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] +// BIN: s_endpgm // 000000000004: BF810000 +s_endpgm 0 + + +// GCN: s_endpgm 1 ; encoding: [0x01,0x00,0x81,0xbf] +// BIN: s_endpgm 1 // 000000000008: BF810001 +s_endpgm 1 + + Index: test/MachineVerifier/verifier-implicit-virtreg-invalid-physreg-liveness.mir =================================================================== --- test/MachineVerifier/verifier-implicit-virtreg-invalid-physreg-liveness.mir +++ test/MachineVerifier/verifier-implicit-virtreg-invalid-physreg-liveness.mir @@ -5,8 +5,8 @@ # ERROR: *** Bad machine code: Using an undefined physical register *** -# ERROR: instruction: S_ENDPGM implicit %0:vgpr_32, implicit $vcc -# ERROR: operand 1: implicit $vcc +# ERROR: instruction: S_ENDPGM 0, implicit %0:vgpr_32, implicit $vcc +# ERROR: operand 2: implicit $vcc ... @@ -16,7 +16,7 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - S_ENDPGM implicit %0, implicit $vcc + S_ENDPGM 0, implicit %0, implicit $vcc ...