diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -76,7 +76,7 @@ // Don't form VALU clauses. It's not clear what benefit they give, if any. - // In practice s_nop is the only internal instructions we're likely to see. + // In practice s_nop is the only internal instruction we're likely to see. // It's safe to treat the rest as illegal. if (MI.getOpcode() == AMDGPU::S_NOP) return HARDCLAUSE_INTERNAL; @@ -103,23 +103,25 @@ // The last non-internal instruction in the clause. MachineInstr *Last = nullptr; // The length of the clause including any internal instructions in the - // middle. + // middle or after the end of the clause. unsigned Length = 0; // The base operands of *Last. SmallVector BaseOps; }; bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { - assert(CI.Length == - std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1); - if (CI.Length < 2) + // Get the size of the clause excluding any internal instructions at the + // end. + unsigned Size = + std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1; + if (Size < 2) return false; - assert(CI.Length <= 64 && "Hard clause is too long!"); + assert(Size <= 64 && "Hard clause is too long!"); auto &MBB = *CI.First->getParent(); auto ClauseMI = BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) - .addImm(CI.Length - 1); + .addImm(Size - 1); finalizeBundle(MBB, ClauseMI->getIterator(), std::next(CI.Last->getIterator())); return true; diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir --- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir +++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir @@ -1,6 +1,39 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-hard-clauses %s -o - | FileCheck %s +--- +name: nop1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: nop1 + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + ; CHECK: S_NOP 2 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_NOP 2 +... + +--- +name: nop2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: nop2 + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 { + ; CHECK: S_CLAUSE 2 + ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + ; CHECK: S_NOP 2 + ; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0, 0 + ; CHECK: } + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_NOP 2 + $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0, 0 +... + --- name: long_clause tracksRegLiveness: true