diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -184,6 +184,16 @@ if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And)) return false; + // Cannot safely mirror live intervals with PHI nodes, so check for these + // before optimization. + SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); + LiveInterval *SelLI = &LIS->getInterval(SelReg); + if (llvm::any_of(SelLI->vnis(), + [](const VNInfo *VNI) { + return VNI->isPHIDef(); + })) + return false; + // TODO: Guard against implicit def operands? LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' << *And); @@ -204,31 +214,34 @@ LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n'); - SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); - SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); - - LiveInterval *CmpLI = - CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; - LiveInterval *SelLI = - SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; - // Update live intervals for CCReg before potentially removing CmpReg/SelReg, // and their associated liveness information. + SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); if (CCReg.isVirtual()) { - // Note: this ignores that SelLI might have multiple internal values - // or splits and simply extends the live range to cover all cases - // where the result of the v_cndmask_b32 was live (e.g. loops). - // This could yield worse register allocation in rare edge cases. - SlotIndex EndIdx = AndIdx.getRegSlot(); - if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock()) - EndIdx = SelLI->endIndex(); + // Apply live ranges from SelLI to CCReg potentially matching splits + // and extending to loop boundaries. + + auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) { + // Copy live ranges from SelLI, adjusting start and end as required + auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot()); + assert(DefSegment != SelLI->end() && + "No live interval segment covering definition?"); + for (auto I = DefSegment; I != SelLI->end(); ++I) { + SlotIndex Start = I->start < SelIdx.getRegSlot() ? + SelIdx.getRegSlot() : I->start; + SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ? + I->end : AndIdx.getRegSlot(); + Dst.addSegment(LiveRange::Segment(Start, End, VNI)); + } + // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend. + if (!SelLI->getSegmentContaining(AndIdx.getRegSlot())) + Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI)); + }; LiveInterval &CCLI = LIS->getInterval(CCReg); auto CCQ = CCLI.Query(SelIdx.getRegSlot()); - if (CCQ.valueIn()) { - CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(), - EndIdx, CCQ.valueIn())); - } + if (CCQ.valueIn()) + applyLiveRanges(CCLI, CCQ.valueIn()); if (CC->getSubReg()) { LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg()); @@ -237,10 +250,8 @@ Allocator, Mask, [=](LiveInterval::SubRange &SR) { auto CCQS = SR.Query(SelIdx.getRegSlot()); - if (CCQS.valueIn()) { - SR.addSegment(LiveRange::Segment( - SelIdx.getRegSlot(), EndIdx, CCQS.valueIn())); - } + if (CCQS.valueIn()) + applyLiveRanges(SR, CCQS.valueIn()); }, *LIS->getSlotIndexes(), *TRI); CCLI.removeEmptySubRanges(); @@ -253,7 +264,8 @@ // Try to remove compare. Cmp value should not used in between of cmp // and s_and_b64 if VCC or just unused if any other register. - if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || + LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; + if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || (CmpReg == Register(CondReg) && std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), [&](const MachineInstr &MI) { @@ -266,18 +278,16 @@ Cmp->eraseFromParent(); // Try to remove v_cndmask_b32. - if (SelLI) { - // Kill status must be checked before shrinking the live range. - bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); - LIS->shrinkToUses(SelLI); - bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); - if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { - LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); - - LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); - LIS->RemoveMachineInstrFromMaps(*Sel); - Sel->eraseFromParent(); - } + // Kill status must be checked before shrinking the live range. + bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); + LIS->shrinkToUses(SelLI); + bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); + if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { + LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); + + LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); + LIS->RemoveMachineInstrFromMaps(*Sel); + Sel->eraseFromParent(); } } diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-alloc-failure.mir @@ -0,0 +1,293 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -start-before=machine-scheduler -stop-after=virtregrewriter,0 -o - %s | FileCheck %s + +--- +# If optimize-exec-mask-pre-ra over approximates live intervals (not replicating splits) +# then this triggers a register allocation failure. + +# CHECK-LABEL: name: test + +name: test +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32, preferred-register: '$vcc_lo' } + - { id: 1, class: sreg_32, preferred-register: '$vcc_lo' } + - { id: 2, class: sreg_32_xm0_xexec, preferred-register: '$vcc_lo' } +liveins: + - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' } +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0.entry: + liveins: $vgpr0, $sgpr4_sgpr5 + + %3:sgpr_64 = COPY $sgpr4_sgpr5 + %4:vgpr_32 = COPY $vgpr0 + %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 16, 0 + S_BITCMP1_B32 %5, 0, implicit-def $scc + %6:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + %7:sreg_32 = S_MOV_B32 -1 + %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 136, 0 + S_CBRANCH_SCC1 %bb.2, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 8, 0 + %10:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 24, 0 + %11:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 40, 0 + %12:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 48, 0 + %13:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 64, 0 + S_BITCMP1_B32 %11, 0, implicit-def $scc + %14:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + S_BITCMP1_B32 %13, 0, implicit-def $scc + %15:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + S_BITCMP1_B32 %13, 8, implicit-def $scc + %2:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + %16:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 72, 0 + %17:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 80, 0 + S_BITCMP1_B32 %17, 0, implicit-def $scc + %18:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + S_BITCMP1_B32 %17, 8, implicit-def $scc + %19:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + %20:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 88, 0 + %21:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 104, 0 + %22:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 120, 0 + S_BITCMP1_B32 %22, 0, implicit-def $scc + %23:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + %24:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 128, 0 + %25:sreg_64 = S_MOV_B64 0 + %26:sreg_64_xexec = S_LOAD_DWORDX2_IMM %25, 0, 0 + %27:sreg_64 = S_MOV_B64_IMM_PSEUDO 4652218415073722368 + %28:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + %29:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %14, implicit $exec + %30:sreg_64 = S_MOV_B64_IMM_PSEUDO 4358002977218854975 + undef %31.sub1:sreg_64 = S_MOV_B32 -1064252416 + %32:sreg_32 = S_OR_B32 %19, %18, implicit-def dead $scc + undef %33.sub1:sreg_64 = S_MOV_B32 2146435072 + %34:sreg_64 = S_MOV_B64_IMM_PSEUDO 4592094252754343337 + %35:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593089322246397463 + %36:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593150332132823898 + %37:sreg_64 = S_MOV_B64_IMM_PSEUDO 4593971714784152002 + %38:sreg_64 = S_MOV_B64_IMM_PSEUDO 4594710915293070409 + %39:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595718710613720112 + %40:sreg_64 = S_MOV_B64_IMM_PSEUDO 4597174419628462798 + %41:sreg_64 = S_MOV_B64_IMM_PSEUDO 4598818590920614106 + %42:sreg_64 = S_MOV_B64_IMM_PSEUDO 4600877379321698716 + %43:sreg_64 = S_MOV_B64_IMM_PSEUDO 4604180019048437077 + undef %44.sub1:sreg_64 = S_MOV_B32 -1075489451 + %45:sreg_64 = S_MOV_B64_IMM_PSEUDO 4609176140021203710 + undef %46.sub1:sreg_64 = S_MOV_B32 -1132807010 + %47:sreg_64 = S_MOV_B64_IMM_PSEUDO 4508818957471820556 + %48:sreg_64 = S_MOV_B64_IMM_PSEUDO 4493147761815702327 + %49:sreg_64 = S_MOV_B64_IMM_PSEUDO 4523617260404727396 + %50:sreg_64 = S_MOV_B64_IMM_PSEUDO 4537941333260232368 + %51:sreg_64 = S_MOV_B64_IMM_PSEUDO 4551452160460988270 + %52:sreg_64 = S_MOV_B64_IMM_PSEUDO 4564047942395279280 + %53:sreg_64 = S_MOV_B64_IMM_PSEUDO 4575957461383652130 + %54:sreg_64 = S_MOV_B64_IMM_PSEUDO 4586165620538933921 + %55:sreg_64 = S_MOV_B64_IMM_PSEUDO 4595172819793696017 + %56:sreg_64 = S_MOV_B64_IMM_PSEUDO 4602678819172646923 + undef %57.sub1:sreg_64 = S_MOV_B32 -1101341163 + %7:sreg_32 = IMPLICIT_DEF + %58:sreg_32 = IMPLICIT_DEF + %59:sreg_32 = COPY %27.sub0 + %60:vreg_64 = COPY %28 + %61:vreg_64 = COPY %28 + %62:vreg_64 = COPY %28 + %63:vreg_64 = COPY %28 + %64:vreg_64 = COPY %28 + S_BRANCH %bb.3 + + bb.2: + %65:sreg_32 = COPY $exec_lo, implicit-def $exec_lo + %66:sreg_32 = S_AND_B32 %65, %7, implicit-def dead $scc + $exec_lo = S_MOV_B32_term %66 + S_CBRANCH_EXECZ %bb.18, implicit $exec + S_BRANCH %bb.5 + + bb.3: + %67:sreg_32 = S_AND_B32 $exec_lo, %6, implicit-def dead $scc + $vcc_lo = COPY %67 + %58:sreg_32 = S_OR_B32 %58, $exec_lo, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.7, implicit killed $vcc + + bb.4: + %64:vreg_64 = IMPLICIT_DEF + %63:vreg_64 = IMPLICIT_DEF + %62:vreg_64 = IMPLICIT_DEF + %61:vreg_64 = IMPLICIT_DEF + %60:vreg_64 = IMPLICIT_DEF + %28:vreg_64 = IMPLICIT_DEF + %68:sreg_32 = S_MOV_B32 -1 + S_BRANCH %bb.9 + + bb.5: + S_CBRANCH_SCC1 %bb.18, implicit undef $scc + + bb.6: + %69:sreg_32_xm0_xexec = S_XOR_B32 %6, -1, implicit-def dead $scc + %70:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %3, 96, 0 + S_BITCMP1_B32 %70, 0, implicit-def $scc + %71:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + S_BITCMP1_B32 %8.sub1, 0, implicit-def $scc + %72:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + %73:sreg_32_xm0_xexec = S_XOR_B32 %72, -1, implicit-def dead $scc + %74:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %73, implicit $exec + %75:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %71, implicit $exec + %76:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %69, implicit $exec + S_BRANCH %bb.14 + + bb.7: + %77:vreg_64 = COPY %10.sub0_sub1 + %78:vreg_64 = FLAT_LOAD_DWORDX2 %77, 0, 0, implicit $exec, implicit $flat_scr + %79:vreg_64 = COPY %10.sub2_sub3 + %80:vreg_64 = FLAT_LOAD_DWORDX2 %79, 0, 0, implicit $exec, implicit $flat_scr + %81:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %29, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %81, implicit-def dead $scc + %82:sreg_64 = COPY %12.sub0_sub1 + S_CBRANCH_VCCNZ %bb.10, implicit killed $vcc + S_BRANCH %bb.8 + + bb.8: + %82:sreg_64 = S_MOV_B64 0 + S_BRANCH %bb.10 + + bb.9: + %83:sreg_32 = S_XOR_B32 %68, -1, implicit-def dead $scc + %84:sreg_32 = S_AND_B32 $exec_lo, %58, implicit-def $scc + %59:sreg_32 = S_OR_B32 %84, %59, implicit-def $scc + %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %85:sreg_32 = S_ANDN2_B32 %7, $exec_lo, implicit-def dead $scc + %86:sreg_32 = S_AND_B32 %83, $exec_lo, implicit-def dead $scc + %7:sreg_32 = S_OR_B32 %85, %86, implicit-def dead $scc + $exec_lo = S_ANDN2_B32_term $exec_lo, %59, implicit-def $scc + S_CBRANCH_EXECNZ %bb.3, implicit $exec + S_BRANCH %bb.19 + + bb.10: + %87:sreg_64_xexec = S_LOAD_DWORDX2_IMM %26, 16, 0 + undef %88.sub1:sreg_64 = S_AND_B32 %87.sub1, 2147483647, implicit-def dead $scc + %89:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e64 2, %87, 0, 0, implicit $mode, implicit $exec + %90:vreg_64 = V_CVT_F64_I32_e32 %89, implicit $mode, implicit $exec + %91:vreg_64 = nofpexcept V_FMA_F64_e64 0, 0, 0, %30, 0, %87, 0, 0, implicit $mode, implicit $exec + %0:sreg_32 = nofpexcept V_CMP_LT_F64_e64 0, %27, 0, %91, 0, implicit $mode, implicit $exec + %31.sub0:sreg_64 = COPY %27.sub0 + %1:sreg_32 = nofpexcept V_CMP_GT_F64_e64 0, %31, 0, %90, 0, implicit $mode, implicit $exec + S_CBRANCH_SCC0 %bb.12, implicit undef $scc + + bb.11: + %92:sreg_32_xm0_xexec = S_OR_B32 %1, %0, implicit-def dead $scc + undef %93.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub3, 0, 0, %92, implicit $exec + %93.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %21.sub2, 0, 0, %92, implicit $exec + S_BRANCH %bb.13 + + bb.12: + %93:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + + bb.13: + %88.sub0:sreg_64 = COPY %87.sub0 + %94:vgpr_32 = COPY %16.sub0 + %95:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub0, 0, %94, %2, implicit $exec + %96:vgpr_32 = COPY %16.sub1 + %97:vgpr_32 = V_CNDMASK_B32_e64 0, %82.sub1, 0, %96, %2, implicit $exec + %98:vgpr_32 = V_CNDMASK_B32_e64 0, %97, 0, 2146959360, %18, implicit $exec + dead %99:sreg_32 = S_AND_B32 %32, $exec_lo, implicit-def $scc + %100:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed $scc + undef %101.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %95, 0, 0, %100, implicit $exec + %101.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %98, 0, 0, %19, implicit $exec + %64:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %64, 0, %101, 0, 0, implicit $mode, implicit $exec + %63:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %63, 0, %21.sub0_sub1, 0, 0, implicit $mode, implicit $exec + %62:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %62, 0, %12.sub2_sub3, 0, 0, implicit $mode, implicit $exec + %33.sub0:sreg_64 = COPY %27.sub0 + %102:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F64_e64 0, %33, 0, %88, 0, implicit $mode, implicit $exec + %103:sreg_32 = nofpexcept V_CMP_EQ_F64_e64 0, 0, 0, %87, 0, implicit $mode, implicit $exec + %104:sreg_32 = S_XOR_B32 %15, %103, implicit-def dead $scc + dead %105:sreg_32 = S_AND_B32 %104, $exec_lo, implicit-def $scc + %106:sgpr_32 = S_CSELECT_B32 0, 2146435072, implicit killed $scc + %107:vgpr_32 = V_CNDMASK_B32_e64 0, %93.sub1, 0, %106, %102, implicit $exec + undef %108.sub1:vreg_64 = V_CNDMASK_B32_e64 0, %107, 0, 0, %23, implicit $exec + %109:sreg_32_xm0_xexec = S_OR_B32 %23, %102, implicit-def dead $scc + %108.sub0:vreg_64 = V_CNDMASK_B32_e64 0, %93.sub0, 0, 0, %109, implicit $exec + %61:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %61, 0, %108, 0, 0, implicit $mode, implicit $exec + %110:vreg_64 = nofpexcept V_FMA_F64_e64 0, %78, 0, %35, 0, %34, 0, 0, implicit $mode, implicit $exec + %111:vreg_64 = nofpexcept V_FMA_F64_e64 0, %110, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec + %112:vreg_64 = nofpexcept V_FMA_F64_e64 0, %111, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec + %113:vreg_64 = nofpexcept V_FMA_F64_e64 0, %112, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec + %114:vreg_64 = nofpexcept V_FMA_F64_e64 0, %113, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec + %115:vreg_64 = nofpexcept V_FMA_F64_e64 0, %114, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec + %116:vreg_64 = nofpexcept V_FMA_F64_e64 0, %115, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec + %117:vreg_64 = nofpexcept V_FMA_F64_e64 0, %116, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec + %118:vreg_64 = nofpexcept V_ADD_F64_e64 0, %117, 0, %43, 0, 0, implicit $mode, implicit $exec + %44.sub0:sreg_64 = COPY %43.sub0 + %119:vreg_64 = nofpexcept V_ADD_F64_e64 0, %118, 0, %44, 0, 0, implicit $mode, implicit $exec + %120:vreg_64 = nofpexcept V_ADD_F64_e64 0, %24, 0, %119, 0, 0, implicit $mode, implicit $exec + %121:vreg_64 = nofpexcept V_MUL_F64_e64 0, %9, 0, %120, 0, 0, implicit $mode, implicit $exec + %122:vreg_64 = nofpexcept V_MUL_F64_e64 0, %121, 0, %45, 0, 0, implicit $mode, implicit $exec + %46.sub0:sreg_64 = COPY %30.sub0 + %123:vreg_64 = nofpexcept V_FMA_F64_e64 0, %122, 0, %46, 0, %20, 0, 0, implicit $mode, implicit $exec + %124:vreg_64 = nofpexcept V_FMA_F64_e64 0, %123, 0, %48, 0, %47, 0, 0, implicit $mode, implicit $exec + %125:vreg_64 = nofpexcept V_FMA_F64_e64 0, %124, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec + %126:vreg_64 = nofpexcept V_FMA_F64_e64 0, %125, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec + %127:vreg_64 = nofpexcept V_FMA_F64_e64 0, %126, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec + %128:vreg_64 = nofpexcept V_FMA_F64_e64 0, %127, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec + %129:vreg_64 = nofpexcept V_FMA_F64_e64 0, %128, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec + %130:vreg_64 = nofpexcept V_FMA_F64_e64 0, %129, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec + %131:vreg_64 = nofpexcept V_FMA_F64_e64 0, %130, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec + %132:vreg_64 = nofpexcept V_FMA_F64_e64 0, %131, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec + %60:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %60, 0, %132, 0, 0, implicit $mode, implicit $exec + %133:vreg_64 = nofpexcept V_FMA_F64_e64 0, %80, 0, 0, 0, %36, 0, 0, implicit $mode, implicit $exec + %134:vreg_64 = nofpexcept V_FMA_F64_e64 0, %133, 0, 0, 0, %37, 0, 0, implicit $mode, implicit $exec + %135:vreg_64 = nofpexcept V_FMA_F64_e64 0, %134, 0, 0, 0, %38, 0, 0, implicit $mode, implicit $exec + %136:vreg_64 = nofpexcept V_FMA_F64_e64 0, %135, 0, 0, 0, %39, 0, 0, implicit $mode, implicit $exec + %137:vreg_64 = nofpexcept V_FMA_F64_e64 0, %136, 0, 0, 0, %40, 0, 0, implicit $mode, implicit $exec + %138:vreg_64 = nofpexcept V_FMA_F64_e64 0, %137, 0, 0, 0, %41, 0, 0, implicit $mode, implicit $exec + %139:vreg_64 = nofpexcept V_FMA_F64_e64 0, %138, 0, 0, 0, %42, 0, 0, implicit $mode, implicit $exec + %140:vreg_64 = nofpexcept V_MUL_F64_e64 0, %139, 0, %45, 0, 0, implicit $mode, implicit $exec + %57.sub0:sreg_64 = COPY %48.sub0 + %141:vreg_64 = nofpexcept V_FMA_F64_e64 0, %140, 0, %57, 0, %47, 0, 0, implicit $mode, implicit $exec + %142:vreg_64 = nofpexcept V_FMA_F64_e64 0, %141, 0, 0, 0, %49, 0, 0, implicit $mode, implicit $exec + %143:vreg_64 = nofpexcept V_FMA_F64_e64 0, %142, 0, 0, 0, %50, 0, 0, implicit $mode, implicit $exec + %144:vreg_64 = nofpexcept V_FMA_F64_e64 0, %143, 0, 0, 0, %51, 0, 0, implicit $mode, implicit $exec + %145:vreg_64 = nofpexcept V_FMA_F64_e64 0, %144, 0, 0, 0, %52, 0, 0, implicit $mode, implicit $exec + %146:vreg_64 = nofpexcept V_FMA_F64_e64 0, %145, 0, 0, 0, %53, 0, 0, implicit $mode, implicit $exec + %147:vreg_64 = nofpexcept V_FMA_F64_e64 0, %146, 0, 0, 0, %54, 0, 0, implicit $mode, implicit $exec + %148:vreg_64 = nofpexcept V_FMA_F64_e64 0, %147, 0, 0, 0, %55, 0, 0, implicit $mode, implicit $exec + %149:vreg_64 = nofpexcept V_FMA_F64_e64 0, %148, 0, 0, 0, %56, 0, 0, implicit $mode, implicit $exec + %28:vreg_64 = contract nofpexcept V_ADD_F64_e64 0, %28, 0, %149, 0, 0, implicit $mode, implicit $exec + %58:sreg_32 = V_CMP_LE_U32_e64 %8.sub0, %4, implicit $exec + %68:sreg_32 = S_MOV_B32 0 + S_BRANCH %bb.9 + + bb.14: + S_CBRANCH_SCC1 %bb.17, implicit undef $scc + S_BRANCH %bb.15 + + bb.15: + %150:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %74, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %150, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.17, implicit killed $vcc + S_BRANCH %bb.16 + + bb.16: + %151:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %75, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %151, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.16, implicit killed $vcc + S_BRANCH %bb.17 + + bb.17: + %152:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %76, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %152, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.14, implicit killed $vcc + S_BRANCH %bb.18 + + bb.18: + $exec_lo = S_OR_B32 $exec_lo, %65, implicit-def $scc + S_ENDPGM 0 + + bb.19: + $exec_lo = S_OR_B32 $exec_lo, %59, implicit-def $scc + S_BRANCH %bb.2 + +...