Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1244,6 +1244,53 @@ return RCI.getNumAllocatableRegs(ConstrainedRC); } +static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const MachineInstr &MI, Register Reg) { + LaneBitmask Mask; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0 && MO.isUse()) + return MRI.getMaxLaneMaskForVReg(Reg); + + LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); + if (MO.isDef()) { + if (!MO.isUndef()) + Mask |= ~SubRegMask; + } else + Mask |= SubRegMask; + } + + return Mask; +} + +/// Return true if \p MI at \P Use reads a subset of the lanes live in \p +/// VirtReg. +static bool readsLaneSubset(const MachineRegisterInfo &MRI, + const MachineInstr *MI, const LiveInterval &VirtReg, + const TargetRegisterInfo *TRI, SlotIndex Use) { + // Early check the common case. + if (MI->isCopy() && + MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg()) + return false; + + // FIXME: We're only considering uses, but should be consider defs too? + LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg()); + + LaneBitmask LiveAtMask; + for (const LiveInterval::SubRange &S : VirtReg.subranges()) { + if (S.liveAt(Use)) + LiveAtMask |= S.LaneMask; + } + + // If the live lanes aren't different from the lanes used by the instruction, + // this doesn't help. + return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any(); +} + /// tryInstructionSplit - Split a live range around individual instructions. /// This is normally not worthwhile since the spiller is doing essentially the /// same thing. However, when the live range is in a constrained register @@ -1256,8 +1303,13 @@ SmallVectorImpl &NewVRegs) { const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); // There is no point to this if there are no larger sub-classes. - if (!RegClassInfo.isProperSubClass(CurRC)) - return 0; + + bool SplitSubClass = true; + if (!RegClassInfo.isProperSubClass(CurRC)) { + if (!VirtReg.hasSubRanges()) + return 0; + SplitSubClass = false; + } // Always enable split spill mode, since we're effectively spilling to a // register. @@ -1280,14 +1332,19 @@ // Otherwise, splitting just inserts uncoalescable copies that do not help // the allocation. for (const SlotIndex Use : Uses) { - if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) { if (MI->isFullCopy() || - SuperRCNumAllocatableRegs == - getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, - TII, TRI, RegClassInfo)) { + (SplitSubClass && + SuperRCNumAllocatableRegs == + getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, + TII, TRI, RegClassInfo)) || + // TODO: Handle split for subranges with subclass constraints? + (!SplitSubClass && VirtReg.hasSubRanges() && + !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) { LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } + } SE->openIntv(); SlotIndex SegStart = SE->enterIntvBefore(Use); SlotIndex SegStop = SE->leaveIntvAfter(Use); Index: llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir @@ -0,0 +1,418 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -start-before=greedy,0 -stop-after=virtregrewriter,0 -greedy-regclass-priority-trumps-globalness=1 -o - %s | FileCheck %s + +# The allocation would previously fail due to poor ordering based on +# register class. The super wide tuples should be allocated first so +# that we don't need to try to evict them later. Currently we cannot +# partially evict interfering register tuples. + +--- +name: need_large_tuple_split +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 1, class: sreg_64, preferred-register: '$vcc' } + - { id: 2, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 3, class: sreg_64, preferred-register: '$vcc' } + - { id: 4, class: sreg_64, preferred-register: '$vcc' } + - { id: 5, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 6, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 7, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 8, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 9, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 10, class: sreg_64_xexec, preferred-register: '$vcc' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: + maxKernArgAlign: 1 + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr17' } + occupancy: 8 +body: | + ; CHECK-LABEL: name: need_large_tuple_split + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr33 = COPY $sgpr14 + ; CHECK-NEXT: renamable $sgpr34_sgpr35 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr18_sgpr19 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr20_sgpr21 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr22_sgpr23 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr52 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr24_sgpr25 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec + ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr53 = S_MOV_B32 1083786240 + ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]] + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; CHECK-NEXT: S_BRANCH %bb.17 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr57 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr58 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr59 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr60 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr61 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr62 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr63 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr65 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr66 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr68 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr61 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr63 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr64 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr65 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr33 + ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr15 + ; CHECK-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr16, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, CustomRegMask($sgpr60,$sgpr62) + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.17(0x80000000) + ; CHECK-NEXT: liveins: $sgpr60, $sgpr62 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr60 + ; CHECK-NEXT: $sgpr13 = COPY killed renamable $sgpr62 + ; CHECK-NEXT: $sgpr14 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5) + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_noregs, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: S_BRANCH %bb.17 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr20_sgpr21, undef renamable $sgpr88_sgpr89, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr88_sgpr89 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5 + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead %27:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr90_sgpr91 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $sgpr92_sgpr93 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: dead %30:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr90_sgpr91, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.17(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr68_sgpr69, implicit $exec + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: renamable $sgpr64 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr34_sgpr35 + ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr6_sgpr7 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr52_sgpr53 + ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY killed renamable $sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr38_sgpr39 + ; CHECK-NEXT: renamable $sgpr42_sgpr43 = COPY killed renamable $sgpr12_sgpr13 + ; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr33 + ; CHECK-NEXT: $sgpr13 = COPY renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr36 = COPY killed renamable $sgpr16 + ; CHECK-NEXT: renamable $sgpr37 = COPY killed renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr40 = COPY killed renamable $sgpr8 + ; CHECK-NEXT: renamable $sgpr44_sgpr45 = COPY killed renamable $sgpr18_sgpr19 + ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY killed renamable $sgpr20_sgpr21 + ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr22_sgpr23 + ; CHECK-NEXT: renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr24_sgpr25 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr64_sgpr65 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 + ; CHECK-NEXT: renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr50_sgpr51 + ; CHECK-NEXT: renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr48_sgpr49 + ; CHECK-NEXT: renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr46_sgpr47 + ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr44_sgpr45 + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr42_sgpr43 + ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr40 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr38_sgpr39 + ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr37 + ; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr36 + ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr52_sgpr53 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr92_sgpr93 + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.17 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10: + ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.12(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.11: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.17(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.17 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.12: + ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.13(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr88_sgpr89 + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.13: + ; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr24_sgpr25, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.15, implicit $vcc + ; CHECK-NEXT: S_BRANCH %bb.14 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.14: + ; CHECK-NEXT: successors: %bb.15(0x80000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.15: + ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.16(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr18_sgpr19, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.16: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.17(0x40000000) + ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.17: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %11:sgpr_32 = COPY $sgpr16 + %12:sgpr_32 = COPY $sgpr15 + %13:sgpr_32 = COPY $sgpr14 + %14:sgpr_64 = COPY $sgpr10_sgpr11 + %15:sgpr_64 = COPY $sgpr8_sgpr9 + %16:sgpr_64 = COPY $sgpr6_sgpr7 + %17:sgpr_64 = COPY $sgpr4_sgpr5 + %5:sreg_64_xexec = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec + %6:sreg_64_xexec = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec + %7:sreg_64_xexec = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec + %8:sreg_64_xexec = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + undef %19.sub16:sgpr_1024 = S_MOV_B32 0 + %9:sreg_64_xexec = V_CMP_EQ_U32_e64 undef %20:sreg_32_xm0_xexec, undef %18:vgpr_32, implicit $exec + %21:vreg_1024_align2 = COPY %19, implicit $exec + %10:sreg_64_xexec = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec + %19.sub17:sgpr_1024 = S_MOV_B32 1083786240 + S_BRANCH %bb.1 + + bb.1: + $vcc = S_AND_B64 $exec, %10, implicit-def dead $scc + %22:vreg_1024_align2 = COPY %21 + S_CBRANCH_VCCNZ %bb.2, implicit $vcc + S_BRANCH %bb.17 + + bb.2: + undef %23.sub0:sgpr_1024 = COPY %19.sub16 + %23.sub1:sgpr_1024 = COPY %19.sub16 + %23.sub2:sgpr_1024 = COPY %19.sub16 + %23.sub3:sgpr_1024 = COPY %19.sub16 + %23.sub4:sgpr_1024 = COPY %19.sub16 + %23.sub5:sgpr_1024 = COPY %19.sub16 + %23.sub6:sgpr_1024 = COPY %19.sub16 + %23.sub7:sgpr_1024 = COPY %19.sub16 + %23.sub8:sgpr_1024 = COPY %19.sub16 + %23.sub9:sgpr_1024 = COPY %19.sub16 + %23.sub10:sgpr_1024 = COPY %19.sub16 + %23.sub11:sgpr_1024 = COPY %19.sub16 + %23.sub12:sgpr_1024 = COPY %19.sub16 + %23.sub13:sgpr_1024 = COPY %19.sub16 + %23.sub14:sgpr_1024 = COPY %19.sub16 + %23.sub15:sgpr_1024 = COPY %19.sub16 + %23.sub16:sgpr_1024 = COPY %19.sub16 + %23.sub17:sgpr_1024 = COPY %19.sub16 + %23.sub18:sgpr_1024 = COPY %19.sub16 + %23.sub19:sgpr_1024 = COPY %19.sub16 + %23.sub20:sgpr_1024 = COPY %19.sub16 + %23.sub21:sgpr_1024 = COPY %19.sub16 + %23.sub22:sgpr_1024 = COPY %19.sub16 + %23.sub23:sgpr_1024 = COPY %19.sub16 + %23.sub24:sgpr_1024 = COPY %19.sub16 + %23.sub25:sgpr_1024 = COPY %19.sub16 + %23.sub26:sgpr_1024 = COPY %19.sub16 + %23.sub27:sgpr_1024 = COPY %19.sub16 + %23.sub28:sgpr_1024 = COPY %19.sub16 + %23.sub29:sgpr_1024 = COPY %19.sub16 + %23.sub30:sgpr_1024 = COPY %19.sub16 + %23.sub31:sgpr_1024 = COPY %19.sub16 + %21:vreg_1024_align2 = COPY %23, implicit $exec + S_CBRANCH_EXECZ %bb.11, implicit $exec + S_BRANCH %bb.5 + + bb.3: + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + dead $sgpr30_sgpr31 = SI_CALL undef %24:sreg_64_xexec, 0, CustomRegMask($sgpr60,$sgpr62) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + + bb.4: + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $sgpr12 = COPY %13 + $sgpr13 = COPY %12 + $sgpr14 = COPY %11 + dead $sgpr30_sgpr31 = SI_CALL undef %25:sreg_64, 0, csr_amdgpu_noregs, implicit killed $sgpr12, implicit killed $sgpr13, implicit $sgpr14 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_BRANCH %bb.17 + + bb.5: + %26:sreg_64 = S_AND_B64 %7, undef %3, implicit-def dead $scc + %3:sreg_64 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + $exec = S_MOV_B64_term %26 + S_CBRANCH_EXECZ %bb.12, implicit $exec + + bb.6: + dead %27:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %8, implicit $exec + + bb.7: + %0:sreg_64_xexec = nofpexcept V_CMP_NLT_F64_e64 0, undef %28:sreg_64, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + %1:sreg_64 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + dead %30:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 %22, undef %13, 11, implicit-def $m0, implicit $m0, implicit $exec + + bb.8: + $vcc = S_AND_B64 $exec, %0, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.10, implicit $vcc + + bb.9: + %31:vreg_64_align2 = COPY %19.sub16_sub17, implicit $exec + GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, %31, undef %24:sreg_64_xexec, 0, 0, implicit $exec :: (store (s64), addrspace 1) + %32:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %5, implicit $exec + dead %33:sreg_64_xexec = V_CMP_NE_U32_e64 1, %32, implicit $exec + undef %34.sub0:sreg_64 = S_ADD_U32 %15.sub0, 32, implicit-def dead $scc + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $sgpr4_sgpr5 = COPY %17 + $sgpr6_sgpr7 = COPY %16 + $sgpr10_sgpr11 = COPY %14 + $sgpr12 = COPY %13 + $sgpr13 = COPY %12 + dead $sgpr30_sgpr31 = SI_CALL undef %33, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $sgpr8_sgpr9 = COPY %34 + dead $sgpr30_sgpr31 = SI_CALL undef %33, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $exec = S_MOV_B64_term %1 + S_CBRANCH_EXECZ %bb.10, implicit $exec + S_BRANCH %bb.17 + + bb.10: + S_CBRANCH_EXECZ %bb.8, implicit $exec + S_BRANCH %bb.12 + + bb.11: + S_CBRANCH_EXECZ %bb.1, implicit $exec + S_BRANCH %bb.17 + + bb.12: + $exec = S_MOV_B64_term %3 + S_CBRANCH_EXECZ %bb.11, implicit $exec + + bb.13: + $vcc = S_AND_B64 $exec, %9, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.15, implicit $vcc + S_BRANCH %bb.14 + + bb.14: + + bb.15: + $vcc = S_AND_B64 $exec, %6, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.11, implicit $vcc + + bb.16: + S_CBRANCH_EXECZ %bb.3, implicit $exec + + bb.17: + +... Index: llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir +++ llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -31,46 +31,28 @@ ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK-NEXT: } ; CHECK-NEXT: undef %47.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %47, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %52.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %52, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %57.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %57, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %62.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %62, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %67.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %67, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %72.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %72, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %77.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %77, %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %82.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %82, %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %87.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK-NEXT: undef %91.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK-NEXT: undef %95.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %95, %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %19.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK-NEXT: undef %153.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %153, %stack.14, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.14, align 4, addrspace 5) - ; CHECK-NEXT: undef %102.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec - ; CHECK-NEXT: undef %106.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %106, %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %111.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK-NEXT: undef %54.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec + ; CHECK-NEXT: undef %61.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec + ; CHECK-NEXT: undef %68.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec + ; CHECK-NEXT: undef %75.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec + ; CHECK-NEXT: undef %82.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec + ; CHECK-NEXT: undef %89.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec + ; CHECK-NEXT: undef %94.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec + ; CHECK-NEXT: undef %99.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec + ; CHECK-NEXT: undef %104.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec + ; CHECK-NEXT: undef %139.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec + ; CHECK-NEXT: undef %185.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec + ; CHECK-NEXT: undef %166.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec + ; CHECK-NEXT: undef %113.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK-NEXT: undef %118.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec + ; CHECK-NEXT: undef %123.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, implicit $exec :: (load (s128), align 64, addrspace 1) - ; CHECK-NEXT: undef %115.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK-NEXT: undef %119.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK-NEXT: undef %123.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK-NEXT: undef %127.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %127, %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) + ; CHECK-NEXT: undef %128.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec + ; CHECK-NEXT: undef %133.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK-NEXT: undef %144.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec + ; CHECK-NEXT: undef %149.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) - ; CHECK-NEXT: undef %138.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %142.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef %146.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: undef %150.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE %150, %stack.13, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.13, align 4, addrspace 5) ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %156.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK-NEXT: undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK-NEXT: undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec ; CHECK-NEXT: undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec @@ -80,73 +62,141 @@ ; CHECK-NEXT: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec ; CHECK-NEXT: undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE1]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE2]], %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE3]], %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE4]], %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE5]], %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE6]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE7]], %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %131.sub2:vreg_128 = COPY %87.sub2 - ; CHECK-NEXT: SI_SPILL_V128_SAVE %131, %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE8]], %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %134.sub2:vreg_128 = COPY %91.sub2 - ; CHECK-NEXT: SI_SPILL_V128_SAVE %134, %stack.12, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.12, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.12, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE9]], %stack.12, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.12, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE10]], %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: %19.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.14, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE11]], %stack.14, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.14, align 4, addrspace 5) - ; CHECK-NEXT: undef %103.sub2:vreg_128 = COPY %102.sub2 - ; CHECK-NEXT: %103.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE12:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE12]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE12]], %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %112.sub2:vreg_128 = COPY %111.sub2 - ; CHECK-NEXT: %112.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec - ; CHECK-NEXT: undef %116.sub2:vreg_128 = COPY %115.sub2 - ; CHECK-NEXT: %116.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec - ; CHECK-NEXT: undef %120.sub2:vreg_128 = COPY %119.sub2 - ; CHECK-NEXT: %120.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK-NEXT: undef %48.sub2:vreg_128 = COPY %47.sub2 + ; CHECK-NEXT: %48.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec + ; CHECK-NEXT: undef %50.sub0:vreg_128 = COPY %48.sub0 { + ; CHECK-NEXT: internal %50.sub2:vreg_128 = COPY %48.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %50, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: undef %55.sub2:vreg_128 = COPY %54.sub2 + ; CHECK-NEXT: %55.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec + ; CHECK-NEXT: undef %57.sub0:vreg_128 = COPY %55.sub0 { + ; CHECK-NEXT: internal %57.sub2:vreg_128 = COPY %55.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %57, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: undef %62.sub2:vreg_128 = COPY %61.sub2 + ; CHECK-NEXT: %62.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec + ; CHECK-NEXT: undef %64.sub0:vreg_128 = COPY %62.sub0 { + ; CHECK-NEXT: internal %64.sub2:vreg_128 = COPY %62.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %64, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: undef %69.sub2:vreg_128 = COPY %68.sub2 + ; CHECK-NEXT: %69.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec + ; CHECK-NEXT: undef %71.sub0:vreg_128 = COPY %69.sub0 { + ; CHECK-NEXT: internal %71.sub2:vreg_128 = COPY %69.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %71, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: undef %76.sub2:vreg_128 = COPY %75.sub2 + ; CHECK-NEXT: %76.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec + ; CHECK-NEXT: undef %78.sub0:vreg_128 = COPY %76.sub0 { + ; CHECK-NEXT: internal %78.sub2:vreg_128 = COPY %76.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %78, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: undef %83.sub2:vreg_128 = COPY %82.sub2 + ; CHECK-NEXT: %83.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec + ; CHECK-NEXT: undef %85.sub0:vreg_128 = COPY %83.sub0 { + ; CHECK-NEXT: internal %85.sub2:vreg_128 = COPY %83.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %85, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + ; CHECK-NEXT: undef %90.sub2:vreg_128 = COPY %89.sub2 + ; CHECK-NEXT: %90.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec + ; CHECK-NEXT: undef %140.sub0:vreg_128 = COPY %90.sub0 { + ; CHECK-NEXT: internal %140.sub2:vreg_128 = COPY %90.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %140, %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) + ; CHECK-NEXT: undef %95.sub2:vreg_128 = COPY %94.sub2 + ; CHECK-NEXT: %95.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec + ; CHECK-NEXT: undef %107.sub0:vreg_128 = COPY %95.sub0 { + ; CHECK-NEXT: internal %107.sub2:vreg_128 = COPY %95.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %107, %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) + ; CHECK-NEXT: undef %100.sub2:vreg_128 = COPY %99.sub2 + ; CHECK-NEXT: %100.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec + ; CHECK-NEXT: undef %101.sub0:vreg_128 = COPY %100.sub0 { + ; CHECK-NEXT: internal %101.sub2:vreg_128 = COPY %100.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %105.sub2:vreg_128 = COPY %104.sub2 + ; CHECK-NEXT: %105.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec + ; CHECK-NEXT: undef %106.sub0:vreg_128 = COPY %105.sub0 { + ; CHECK-NEXT: internal %106.sub2:vreg_128 = COPY %105.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %139.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec + ; CHECK-NEXT: undef %158.sub0:vreg_128 = COPY %139.sub0 { + ; CHECK-NEXT: internal %158.sub2:vreg_128 = COPY %139.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %158, %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) + ; CHECK-NEXT: undef %186.sub2:vreg_128 = COPY %185.sub2 + ; CHECK-NEXT: %186.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec + ; CHECK-NEXT: undef %188.sub0:vreg_128 = COPY %186.sub0 { + ; CHECK-NEXT: internal %188.sub2:vreg_128 = COPY %186.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %188, %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) + ; CHECK-NEXT: undef %167.sub2:vreg_128 = COPY %166.sub2 + ; CHECK-NEXT: %167.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec + ; CHECK-NEXT: undef %169.sub0:vreg_128 = COPY %167.sub0 { + ; CHECK-NEXT: internal %169.sub2:vreg_128 = COPY %167.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %169, %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) + ; CHECK-NEXT: undef %114.sub2:vreg_128 = COPY %113.sub2 + ; CHECK-NEXT: %114.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec + ; CHECK-NEXT: undef %115.sub0:vreg_128 = COPY %114.sub0 { + ; CHECK-NEXT: internal %115.sub2:vreg_128 = COPY %114.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %119.sub2:vreg_128 = COPY %118.sub2 + ; CHECK-NEXT: %119.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec + ; CHECK-NEXT: undef %181.sub0:vreg_128 = COPY %119.sub0 { + ; CHECK-NEXT: internal %181.sub2:vreg_128 = COPY %119.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: SI_SPILL_V128_SAVE %181, %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) ; CHECK-NEXT: undef %124.sub2:vreg_128 = COPY %123.sub2 - ; CHECK-NEXT: %124.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE13:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE13]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE13]], %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %139.sub2:vreg_128 = COPY %138.sub2 - ; CHECK-NEXT: %139.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK-NEXT: undef %143.sub2:vreg_128 = COPY %142.sub2 - ; CHECK-NEXT: %143.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK-NEXT: undef %147.sub2:vreg_128 = COPY %146.sub2 - ; CHECK-NEXT: %147.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE14:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.13, align 4, addrspace 5) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE14]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK-NEXT: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE14]], %stack.13, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.13, align 4, addrspace 5) - ; CHECK-NEXT: %156.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK-NEXT: %124.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec + ; CHECK-NEXT: undef %125.sub0:vreg_128 = COPY %124.sub0 { + ; CHECK-NEXT: internal %125.sub2:vreg_128 = COPY %124.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %129.sub2:vreg_128 = COPY %128.sub2 + ; CHECK-NEXT: %129.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec + ; CHECK-NEXT: undef %130.sub0:vreg_128 = COPY %129.sub0 { + ; CHECK-NEXT: internal %130.sub2:vreg_128 = COPY %129.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %134.sub2:vreg_128 = COPY %133.sub2 + ; CHECK-NEXT: %134.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec + ; CHECK-NEXT: undef %135.sub0:vreg_128 = COPY %134.sub0 { + ; CHECK-NEXT: internal %135.sub2:vreg_128 = COPY %134.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %145.sub2:vreg_128 = COPY %144.sub2 + ; CHECK-NEXT: %145.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec + ; CHECK-NEXT: undef %146.sub0:vreg_128 = COPY %145.sub0 { + ; CHECK-NEXT: internal %146.sub2:vreg_128 = COPY %145.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %150.sub2:vreg_128 = COPY %149.sub2 + ; CHECK-NEXT: %150.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec + ; CHECK-NEXT: undef %151.sub0:vreg_128 = COPY %150.sub0 { + ; CHECK-NEXT: internal %151.sub2:vreg_128 = COPY %150.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %157.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK-NEXT: undef %155.sub2:vreg_128 = COPY %157.sub2 + ; CHECK-NEXT: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec + ; CHECK-NEXT: undef %156.sub0:vreg_128 = COPY %155.sub0 { + ; CHECK-NEXT: internal %156.sub2:vreg_128 = COPY %155.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %165.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK-NEXT: undef %163.sub2:vreg_128 = COPY %165.sub2 + ; CHECK-NEXT: %163.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec + ; CHECK-NEXT: undef %164.sub0:vreg_128 = COPY %163.sub0 { + ; CHECK-NEXT: internal %164.sub2:vreg_128 = COPY %163.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %176.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK-NEXT: undef %174.sub2:vreg_128 = COPY %176.sub2 + ; CHECK-NEXT: %174.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec + ; CHECK-NEXT: undef %175.sub0:vreg_128 = COPY %174.sub0 { + ; CHECK-NEXT: internal %175.sub2:vreg_128 = COPY %174.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: undef %195.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK-NEXT: undef %180.sub2:vreg_128 = COPY %195.sub2 + ; CHECK-NEXT: %180.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec + ; CHECK-NEXT: undef %194.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec + ; CHECK-NEXT: undef %193.sub2:vreg_128 = COPY %194.sub2 + ; CHECK-NEXT: %193.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK-NEXT: %36.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK-NEXT: %37.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec ; CHECK-NEXT: %38.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec @@ -175,164 +225,164 @@ ; CHECK-NEXT: %36.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %36.sub3:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: undef %157.sub0:vreg_128 = COPY %156.sub0 { - ; CHECK-NEXT: internal %157.sub2:vreg_128 = COPY %156.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %157.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %157.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.13, align 4, addrspace 5) - ; CHECK-NEXT: undef %149.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub0 { - ; CHECK-NEXT: internal %149.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %149.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %149.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %149, %2, 0, 352, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %145.sub0:vreg_128 = COPY %147.sub0 { - ; CHECK-NEXT: internal %145.sub2:vreg_128 = COPY %147.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %145.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %145.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %145, %2, 0, 368, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %141.sub0:vreg_128 = COPY %143.sub0 { - ; CHECK-NEXT: internal %141.sub2:vreg_128 = COPY %143.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %141.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %141.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %141, %2, 0, 320, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: undef %137.sub0:vreg_128 = COPY %139.sub0 { - ; CHECK-NEXT: internal %137.sub2:vreg_128 = COPY %139.sub2 + ; CHECK-NEXT: undef %191.sub0:vreg_128 = COPY %193.sub0 { + ; CHECK-NEXT: internal %191.sub2:vreg_128 = COPY %193.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %137.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %137.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %137, %2, 0, 336, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) - ; CHECK-NEXT: undef %126.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub0 { - ; CHECK-NEXT: internal %126.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub2 + ; CHECK-NEXT: %191.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %191.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %191, %2, 0, 400, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %178.sub0:vreg_128 = COPY %180.sub0 { + ; CHECK-NEXT: internal %178.sub2:vreg_128 = COPY %180.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %178.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %178.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %178, %2, 0, 352, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: undef %172.sub0:vreg_128 = COPY %175.sub0 { + ; CHECK-NEXT: internal %172.sub2:vreg_128 = COPY %175.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %172.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %172.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %172, %2, 0, 368, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %161.sub0:vreg_128 = COPY %164.sub0 { + ; CHECK-NEXT: internal %161.sub2:vreg_128 = COPY %164.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %161.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %161.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %161, %2, 0, 320, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: undef %153.sub0:vreg_128 = COPY %156.sub0 { + ; CHECK-NEXT: internal %153.sub2:vreg_128 = COPY %156.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %153.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %153.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 336, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %148.sub0:vreg_128 = COPY %151.sub0 { + ; CHECK-NEXT: internal %148.sub2:vreg_128 = COPY %151.sub2 ; CHECK-NEXT: } - ; CHECK-NEXT: %126.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %126.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %126, %2, 0, 288, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: undef %122.sub0:vreg_128 = COPY %124.sub0 { - ; CHECK-NEXT: internal %122.sub2:vreg_128 = COPY %124.sub2 + ; CHECK-NEXT: %148.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %148.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 288, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: undef %143.sub0:vreg_128 = COPY %146.sub0 { + ; CHECK-NEXT: internal %143.sub2:vreg_128 = COPY %146.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %143.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %143.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 304, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %132.sub0:vreg_128 = COPY %135.sub0 { + ; CHECK-NEXT: internal %132.sub2:vreg_128 = COPY %135.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %132.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %132.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %132, %2, 0, 256, 0, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) + ; CHECK-NEXT: undef %127.sub0:vreg_128 = COPY %130.sub0 { + ; CHECK-NEXT: internal %127.sub2:vreg_128 = COPY %130.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %127.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %127.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %127, %2, 0, 272, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %122.sub0:vreg_128 = COPY %125.sub0 { + ; CHECK-NEXT: internal %122.sub2:vreg_128 = COPY %125.sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %122.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %122.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %122, %2, 0, 304, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %118.sub0:vreg_128 = COPY %120.sub0 { - ; CHECK-NEXT: internal %118.sub2:vreg_128 = COPY %120.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %118.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %118.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %118, %2, 0, 256, 0, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) - ; CHECK-NEXT: undef %114.sub0:vreg_128 = COPY %116.sub0 { - ; CHECK-NEXT: internal %114.sub2:vreg_128 = COPY %116.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %114.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %114.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %114, %2, 0, 272, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %110.sub0:vreg_128 = COPY %112.sub0 { - ; CHECK-NEXT: internal %110.sub2:vreg_128 = COPY %112.sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %122, %2, 0, 224, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) + ; CHECK-NEXT: undef %117.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 { + ; CHECK-NEXT: internal %117.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %117.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %117.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %117, %2, 0, 240, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %112.sub0:vreg_128 = COPY %115.sub0 { + ; CHECK-NEXT: internal %112.sub2:vreg_128 = COPY %115.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %112.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %112.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 192, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) + ; CHECK-NEXT: undef %110.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 { + ; CHECK-NEXT: internal %110.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %110.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %110.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %110, %2, 0, 224, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) - ; CHECK-NEXT: undef %105.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub0 { - ; CHECK-NEXT: internal %105.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %105.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %105.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %105, %2, 0, 240, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: undef %101.sub0:vreg_128 = COPY %103.sub0 { - ; CHECK-NEXT: internal %101.sub2:vreg_128 = COPY %103.sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %101.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %101.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %101, %2, 0, 192, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.14, align 4, addrspace 5) - ; CHECK-NEXT: undef %99.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub0 { - ; CHECK-NEXT: internal %99.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %99.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %99.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 208, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: %19.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %19.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) - ; CHECK-NEXT: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub0 { - ; CHECK-NEXT: internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %94.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %94.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.12, align 4, addrspace 5) - ; CHECK-NEXT: undef %90.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub0 { - ; CHECK-NEXT: internal %90.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %90.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %90.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %90, %2, 0, 128, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) - ; CHECK-NEXT: undef %86.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub0 { - ; CHECK-NEXT: internal %86.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %86.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %86.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %86, %2, 0, 144, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) - ; CHECK-NEXT: undef %81.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub0 { - ; CHECK-NEXT: internal %81.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %110, %2, 0, 208, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) + ; CHECK-NEXT: undef %184.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 { + ; CHECK-NEXT: internal %184.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %184.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %184.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %184, %2, 0, 160, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) + ; CHECK-NEXT: undef %137.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 { + ; CHECK-NEXT: internal %137.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %137.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %137.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %137, %2, 0, 176, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: undef %103.sub0:vreg_128 = COPY %106.sub0 { + ; CHECK-NEXT: internal %103.sub2:vreg_128 = COPY %106.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %103.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %103.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %103, %2, 0, 128, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) + ; CHECK-NEXT: undef %98.sub0:vreg_128 = COPY %101.sub0 { + ; CHECK-NEXT: internal %98.sub2:vreg_128 = COPY %101.sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %98.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %98.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %98, %2, 0, 144, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) + ; CHECK-NEXT: undef %93.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 { + ; CHECK-NEXT: internal %93.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %93.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %93.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %93, %2, 0, 96, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) + ; CHECK-NEXT: undef %88.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 { + ; CHECK-NEXT: internal %88.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %88.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %88.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %88, %2, 0, 112, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) + ; CHECK-NEXT: undef %81.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 { + ; CHECK-NEXT: internal %81.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %81.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %81.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %81, %2, 0, 96, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) - ; CHECK-NEXT: undef %76.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub0 { - ; CHECK-NEXT: internal %76.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %76.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %76.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %76, %2, 0, 112, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) - ; CHECK-NEXT: undef %71.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub0 { - ; CHECK-NEXT: internal %71.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %71.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %71.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %71, %2, 0, 64, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: undef %66.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub0 { - ; CHECK-NEXT: internal %66.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %66.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %66.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: undef %61.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub0 { - ; CHECK-NEXT: internal %61.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %61.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %61.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: undef %56.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub0 { - ; CHECK-NEXT: internal %56.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %56.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %56.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef %51.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub0 { - ; CHECK-NEXT: internal %51.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub2 - ; CHECK-NEXT: } - ; CHECK-NEXT: %51.sub1:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: %51.sub3:vreg_128 = COPY %43.sub1 - ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) - ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub0 { - ; CHECK-NEXT: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub2 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %81, %2, 0, 64, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: undef %74.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 { + ; CHECK-NEXT: internal %74.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %74.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %74.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %74, %2, 0, 80, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: undef %67.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 { + ; CHECK-NEXT: internal %67.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %67.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %67.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %67, %2, 0, 32, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: undef %60.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 { + ; CHECK-NEXT: internal %60.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %60.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %60.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %60, %2, 0, 48, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: undef %53.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 { + ; CHECK-NEXT: internal %53.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2 + ; CHECK-NEXT: } + ; CHECK-NEXT: %53.sub1:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: %53.sub3:vreg_128 = COPY %43.sub1 + ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %53, %2, 0, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) + ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 { + ; CHECK-NEXT: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2 ; CHECK-NEXT: } ; CHECK-NEXT: %46.sub1:vreg_128 = COPY %43.sub1 ; CHECK-NEXT: %46.sub3:vreg_128 = COPY %43.sub1 Index: llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -0,0 +1,1881 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -greedy-regclass-priority-trumps-globalness=1 -o - %s | FileCheck -check-prefixes=GFX90A,GLOBALNESS1 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -greedy-regclass-priority-trumps-globalness=0 -o - %s | FileCheck -check-prefixes=GFX90A,GLOBALNESS0 %s + +declare void @wobble() + +define internal fastcc void @widget() { +; GFX90A-LABEL: widget: +; GFX90A: ; %bb.0: ; %bb +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_or_saveexec_b64 s[16:17], -1 +; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-NEXT: s_mov_b64 exec, s[16:17] +; GFX90A-NEXT: v_writelane_b32 v40, s33, 2 +; GFX90A-NEXT: s_mov_b32 s33, s32 +; GFX90A-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-NEXT: s_getpc_b64 s[16:17] +; GFX90A-NEXT: s_add_u32 s16, s16, wobble@gotpcrel32@lo+4 +; GFX90A-NEXT: s_addc_u32 s17, s17, wobble@gotpcrel32@hi+12 +; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] +bb: + tail call void @wobble() + unreachable +} + +define amdgpu_kernel void @kernel(i32 addrspace(1)* %arg1.global, i1 %tmp3.i.i, i32 %tmp5.i.i, i32 %tmp427.i, i1 %tmp438.i, double %tmp27.i, i1 %tmp48.i) { +; GLOBALNESS1-LABEL: kernel: +; GLOBALNESS1: ; %bb.0: ; %bb +; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9] +; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[6:7] +; GLOBALNESS1-NEXT: s_mov_b64 s[40:41], s[4:5] +; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, v0 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v44, 0 +; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 +; GLOBALNESS1-NEXT: s_load_dwordx2 s[56:57], s[8:9], 0x8 +; GLOBALNESS1-NEXT: s_nop 0 +; GLOBALNESS1-NEXT: s_load_dword s8, s[8:9], 0x14 +; GLOBALNESS1-NEXT: s_nop 0 +; GLOBALNESS1-NEXT: s_load_dwordx2 s[6:7], s[38:39], 0x18 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS1-NEXT: global_store_dword v[0:1], v44, off +; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) +; GLOBALNESS1-NEXT: global_load_dword v0, v44, s[4:5] +; GLOBALNESS1-NEXT: s_mov_b32 s61, 0 +; GLOBALNESS1-NEXT: s_mov_b32 s60, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s62, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s63, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s64, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s65, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s66, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s67, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s68, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s69, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s70, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s71, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s72, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s73, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s74, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s75, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s76, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s77, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s78, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s79, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s80, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s81, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s82, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s83, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s84, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s85, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s86, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s87, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s88, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s89, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s90, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s91, s61 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a32, s60 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a33, s61 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a34, s62 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a35, s63 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a36, s64 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a37, s65 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a38, s66 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a39, s67 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a40, s68 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a41, s69 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a42, s70 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a43, s71 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a44, s72 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a45, s73 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a46, s74 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a47, s75 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a48, s76 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a49, s77 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a50, s78 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a51, s79 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a52, s80 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a53, s81 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a54, s82 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a55, s83 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a56, s84 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a57, s85 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a58, s86 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a59, s87 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a60, s88 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a61, s89 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a62, s90 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a63, s91 +; GLOBALNESS1-NEXT: s_movk_i32 s60, 0x80 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s60, 0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s61, 1 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s62, 2 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s63, 3 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s64, 4 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s65, 5 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s66, 6 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s67, 7 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s68, 8 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s69, 9 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s70, 10 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s71, 11 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s72, 12 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s73, 13 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s74, 14 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s75, 15 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s76, 16 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s77, 17 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s78, 18 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s79, 19 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s80, 20 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s81, 21 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s82, 22 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s83, 23 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s84, 24 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s85, 25 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s86, 26 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s87, 27 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s88, 28 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s89, 29 +; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, 0x40994400 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s90, 30 +; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s91, 31 +; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[6:7], v[44:45] +; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 32 +; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 33 +; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[6:7], 0 +; GLOBALNESS1-NEXT: s_bitcmp1_b32 s56, 0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 34 +; GLOBALNESS1-NEXT: s_load_dword s9, s[38:39], 0x20 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 35 +; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS1-NEXT: s_xor_b64 s[46:47], s[4:5], -1 +; GLOBALNESS1-NEXT: s_bitcmp1_b32 s8, 0 +; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS1-NEXT: s_xor_b64 s[50:51], s[4:5], -1 +; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) +; GLOBALNESS1-NEXT: s_bitcmp1_b32 s9, 0 +; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS1-NEXT: s_getpc_b64 s[6:7] +; GLOBALNESS1-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4 +; GLOBALNESS1-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12 +; GLOBALNESS1-NEXT: s_xor_b64 s[52:53], s[4:5], -1 +; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 36 +; GLOBALNESS1-NEXT: s_load_dwordx2 s[42:43], s[6:7], 0x0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 37 +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 38 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 39 +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s4, 40 +; GLOBALNESS1-NEXT: s_mov_b32 s100, s16 +; GLOBALNESS1-NEXT: s_mov_b32 s101, s15 +; GLOBALNESS1-NEXT: s_mov_b32 s44, s14 +; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11] +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v1 +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[48:49], 1, v0 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s5, 41 +; GLOBALNESS1-NEXT: s_mov_b32 s45, 0x3ff00000 +; GLOBALNESS1-NEXT: s_mov_b32 s32, 0 +; GLOBALNESS1-NEXT: s_branch .LBB1_4 +; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_readlane_b32 s6, v41, 40 +; GLOBALNESS1-NEXT: v_readlane_b32 s7, v41, 41 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29 +; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow6 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow19 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a63, v31 +; GLOBALNESS1-NEXT: v_readlane_b32 s4, v41, 42 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a62, v30 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a61, v29 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a60, v28 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a59, v27 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a58, v26 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a57, v25 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a56, v24 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a55, v23 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a54, v22 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a53, v21 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a52, v20 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a51, v19 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a50, v18 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a49, v17 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a48, v16 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a47, v15 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a46, v14 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a45, v13 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a44, v12 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a43, v11 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a42, v10 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a41, v9 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a40, v8 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a39, v7 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a38, v6 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a37, v5 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a36, v4 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a35, v3 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a34, v2 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a33, v1 +; GLOBALNESS1-NEXT: v_accvgpr_write_b32 a32, v0 +; GLOBALNESS1-NEXT: v_readlane_b32 s5, v41, 43 +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_30 +; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5 +; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1 +; GLOBALNESS1-NEXT: ; Child Loop BB1_17 Depth 2 +; GLOBALNESS1-NEXT: v_readlane_b32 s60, v41, 0 +; GLOBALNESS1-NEXT: v_readlane_b32 s61, v41, 1 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[60:61], s[60:61] op_sel:[0,1] +; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1] +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: buffer_store_dword v44, off, s[0:3], 0 +; GLOBALNESS1-NEXT: flat_load_dword v43, v[0:1] +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] +; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s44 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s101 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s100 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS1-NEXT: v_readlane_b32 s62, v41, 2 +; GLOBALNESS1-NEXT: v_readlane_b32 s63, v41, 3 +; GLOBALNESS1-NEXT: v_readlane_b32 s64, v41, 4 +; GLOBALNESS1-NEXT: v_readlane_b32 s65, v41, 5 +; GLOBALNESS1-NEXT: v_readlane_b32 s66, v41, 6 +; GLOBALNESS1-NEXT: v_readlane_b32 s67, v41, 7 +; GLOBALNESS1-NEXT: v_readlane_b32 s68, v41, 8 +; GLOBALNESS1-NEXT: v_readlane_b32 s69, v41, 9 +; GLOBALNESS1-NEXT: v_readlane_b32 s70, v41, 10 +; GLOBALNESS1-NEXT: v_readlane_b32 s71, v41, 11 +; GLOBALNESS1-NEXT: v_readlane_b32 s72, v41, 12 +; GLOBALNESS1-NEXT: v_readlane_b32 s73, v41, 13 +; GLOBALNESS1-NEXT: v_readlane_b32 s74, v41, 14 +; GLOBALNESS1-NEXT: v_readlane_b32 s75, v41, 15 +; GLOBALNESS1-NEXT: v_readlane_b32 s76, v41, 16 +; GLOBALNESS1-NEXT: v_readlane_b32 s77, v41, 17 +; GLOBALNESS1-NEXT: v_readlane_b32 s78, v41, 18 +; GLOBALNESS1-NEXT: v_readlane_b32 s79, v41, 19 +; GLOBALNESS1-NEXT: v_readlane_b32 s80, v41, 20 +; GLOBALNESS1-NEXT: v_readlane_b32 s81, v41, 21 +; GLOBALNESS1-NEXT: v_readlane_b32 s82, v41, 22 +; GLOBALNESS1-NEXT: v_readlane_b32 s83, v41, 23 +; GLOBALNESS1-NEXT: v_readlane_b32 s84, v41, 24 +; GLOBALNESS1-NEXT: v_readlane_b32 s85, v41, 25 +; GLOBALNESS1-NEXT: v_readlane_b32 s86, v41, 26 +; GLOBALNESS1-NEXT: v_readlane_b32 s87, v41, 27 +; GLOBALNESS1-NEXT: v_readlane_b32 s88, v41, 28 +; GLOBALNESS1-NEXT: v_readlane_b32 s89, v41, 29 +; GLOBALNESS1-NEXT: v_readlane_b32 s90, v41, 30 +; GLOBALNESS1-NEXT: v_readlane_b32 s91, v41, 31 +; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[42:43] +; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[58:59] +; GLOBALNESS1-NEXT: ; kill: killed $sgpr4_sgpr5 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_10 +; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], -1 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 +; GLOBALNESS1-NEXT: s_cmp_lt_i32 s57, 1 +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7 +; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock3 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s57, 1 +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS1-NEXT: .LBB1_7: ; %Flow17 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9 +; GLOBALNESS1-NEXT: ; %bb.8: ; %LeafBlock +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s57, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], 0 +; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow18 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s8, 42 +; GLOBALNESS1-NEXT: v_writelane_b32 v41, s9, 43 +; GLOBALNESS1-NEXT: .LBB1_10: ; %Flow16 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_readlane_b32 s68, v41, 0 +; GLOBALNESS1-NEXT: v_readlane_b32 s69, v41, 1 +; GLOBALNESS1-NEXT: s_mov_b64 s[60:61], s[68:69] +; GLOBALNESS1-NEXT: v_readlane_b32 s70, v41, 2 +; GLOBALNESS1-NEXT: v_readlane_b32 s71, v41, 3 +; GLOBALNESS1-NEXT: v_readlane_b32 s72, v41, 4 +; GLOBALNESS1-NEXT: v_readlane_b32 s73, v41, 5 +; GLOBALNESS1-NEXT: v_readlane_b32 s74, v41, 6 +; GLOBALNESS1-NEXT: v_readlane_b32 s75, v41, 7 +; GLOBALNESS1-NEXT: v_readlane_b32 s76, v41, 8 +; GLOBALNESS1-NEXT: v_readlane_b32 s77, v41, 9 +; GLOBALNESS1-NEXT: v_readlane_b32 s78, v41, 10 +; GLOBALNESS1-NEXT: v_readlane_b32 s79, v41, 11 +; GLOBALNESS1-NEXT: v_readlane_b32 s80, v41, 12 +; GLOBALNESS1-NEXT: v_readlane_b32 s81, v41, 13 +; GLOBALNESS1-NEXT: v_readlane_b32 s82, v41, 14 +; GLOBALNESS1-NEXT: v_readlane_b32 s83, v41, 15 +; GLOBALNESS1-NEXT: v_readlane_b32 s84, v41, 16 +; GLOBALNESS1-NEXT: v_readlane_b32 s85, v41, 17 +; GLOBALNESS1-NEXT: v_readlane_b32 s86, v41, 18 +; GLOBALNESS1-NEXT: v_readlane_b32 s87, v41, 19 +; GLOBALNESS1-NEXT: v_readlane_b32 s88, v41, 20 +; GLOBALNESS1-NEXT: v_readlane_b32 s89, v41, 21 +; GLOBALNESS1-NEXT: v_readlane_b32 s90, v41, 22 +; GLOBALNESS1-NEXT: v_readlane_b32 s91, v41, 23 +; GLOBALNESS1-NEXT: v_readlane_b32 s92, v41, 24 +; GLOBALNESS1-NEXT: v_readlane_b32 s93, v41, 25 +; GLOBALNESS1-NEXT: v_readlane_b32 s94, v41, 26 +; GLOBALNESS1-NEXT: v_readlane_b32 s95, v41, 27 +; GLOBALNESS1-NEXT: v_readlane_b32 s96, v41, 28 +; GLOBALNESS1-NEXT: v_readlane_b32 s97, v41, 29 +; GLOBALNESS1-NEXT: v_readlane_b32 s98, v41, 30 +; GLOBALNESS1-NEXT: v_readlane_b32 s99, v41, 31 +; GLOBALNESS1-NEXT: s_mov_b32 s68, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s69, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s70, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s71, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s72, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s73, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s74, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s75, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s76, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s77, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s78, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s79, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s80, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s81, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s82, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s83, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s84, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s85, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s86, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s87, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s88, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s89, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s90, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s91, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s92, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s93, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s94, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s95, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s96, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s97, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s98, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s99, s61 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1] +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3 +; GLOBALNESS1-NEXT: ; %bb.11: ; %baz.exit.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS1-NEXT: flat_load_dword v0, v[0:1] +; GLOBALNESS1-NEXT: v_readlane_b32 s60, v41, 0 +; GLOBALNESS1-NEXT: v_readlane_b32 s61, v41, 1 +; GLOBALNESS1-NEXT: v_readlane_b32 s64, v41, 4 +; GLOBALNESS1-NEXT: v_readlane_b32 s65, v41, 5 +; GLOBALNESS1-NEXT: v_readlane_b32 s66, v41, 6 +; GLOBALNESS1-NEXT: v_readlane_b32 s67, v41, 7 +; GLOBALNESS1-NEXT: v_readlane_b32 s68, v41, 8 +; GLOBALNESS1-NEXT: v_readlane_b32 s69, v41, 9 +; GLOBALNESS1-NEXT: v_readlane_b32 s70, v41, 10 +; GLOBALNESS1-NEXT: v_readlane_b32 s71, v41, 11 +; GLOBALNESS1-NEXT: v_readlane_b32 s72, v41, 12 +; GLOBALNESS1-NEXT: v_readlane_b32 s73, v41, 13 +; GLOBALNESS1-NEXT: v_readlane_b32 s74, v41, 14 +; GLOBALNESS1-NEXT: v_readlane_b32 s75, v41, 15 +; GLOBALNESS1-NEXT: v_readlane_b32 s76, v41, 16 +; GLOBALNESS1-NEXT: v_readlane_b32 s77, v41, 17 +; GLOBALNESS1-NEXT: v_readlane_b32 s78, v41, 18 +; GLOBALNESS1-NEXT: v_readlane_b32 s79, v41, 19 +; GLOBALNESS1-NEXT: v_readlane_b32 s80, v41, 20 +; GLOBALNESS1-NEXT: v_readlane_b32 s81, v41, 21 +; GLOBALNESS1-NEXT: s_mov_b32 s65, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s64, s61 +; GLOBALNESS1-NEXT: v_readlane_b32 s82, v41, 22 +; GLOBALNESS1-NEXT: v_readlane_b32 s83, v41, 23 +; GLOBALNESS1-NEXT: v_readlane_b32 s84, v41, 24 +; GLOBALNESS1-NEXT: v_readlane_b32 s85, v41, 25 +; GLOBALNESS1-NEXT: v_readlane_b32 s86, v41, 26 +; GLOBALNESS1-NEXT: v_readlane_b32 s87, v41, 27 +; GLOBALNESS1-NEXT: v_readlane_b32 s88, v41, 28 +; GLOBALNESS1-NEXT: v_readlane_b32 s89, v41, 29 +; GLOBALNESS1-NEXT: v_readlane_b32 s90, v41, 30 +; GLOBALNESS1-NEXT: v_readlane_b32 s91, v41, 31 +; GLOBALNESS1-NEXT: s_mov_b32 s66, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s67, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s68, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s69, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s70, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s71, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s72, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s73, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s74, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s75, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s76, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s77, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s78, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s79, s45 +; GLOBALNESS1-NEXT: s_mov_b32 s80, s61 +; GLOBALNESS1-NEXT: s_mov_b32 s81, s45 +; GLOBALNESS1-NEXT: v_readlane_b32 s62, v41, 2 +; GLOBALNESS1-NEXT: v_readlane_b32 s63, v41, 3 +; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[54:55], 0, v0 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[64:65], s[64:65] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[66:67], s[66:67] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[90:91], s[54:55] +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26 +; GLOBALNESS1-NEXT: ; %bb.12: ; %bb33.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GLOBALNESS1-NEXT: v_readlane_b32 s4, v41, 36 +; GLOBALNESS1-NEXT: v_readlane_b32 s5, v41, 37 +; GLOBALNESS1-NEXT: s_mov_b64 s[92:93], s[58:59] +; GLOBALNESS1-NEXT: s_mov_b32 s89, s57 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_14 +; GLOBALNESS1-NEXT: ; %bb.13: ; %bb39.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[44:45], off +; GLOBALNESS1-NEXT: .LBB1_14: ; %bb44.lr.ph.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v43 +; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc +; GLOBALNESS1-NEXT: v_readlane_b32 s62, v41, 32 +; GLOBALNESS1-NEXT: v_readlane_b32 s64, v41, 34 +; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) +; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e64 s[56:57], 0, v[0:1] +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2 +; GLOBALNESS1-NEXT: v_readlane_b32 s63, v41, 33 +; GLOBALNESS1-NEXT: v_readlane_b32 s65, v41, 35 +; GLOBALNESS1-NEXT: s_branch .LBB1_17 +; GLOBALNESS1-NEXT: .LBB1_15: ; %Flow7 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] +; GLOBALNESS1-NEXT: .LBB1_16: ; %bb63.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[52:53] +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25 +; GLOBALNESS1-NEXT: .LBB1_17: ; %bb44.i +; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1 +; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[46:47] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_16 +; GLOBALNESS1-NEXT: ; %bb.18: ; %bb46.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[50:51] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_16 +; GLOBALNESS1-NEXT: ; %bb.19: ; %bb50.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[62:63] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_22 +; GLOBALNESS1-NEXT: ; %bb.20: ; %bb3.i.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[64:65] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_22 +; GLOBALNESS1-NEXT: ; %bb.21: ; %bb6.i.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[56:57] +; GLOBALNESS1-NEXT: .LBB1_22: ; %spam.exit.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[48:49] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_16 +; GLOBALNESS1-NEXT: ; %bb.23: ; %bb55.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: s_add_u32 s60, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s61, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[60:61] +; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s44 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s101 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s100 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[42:43] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[60:61] +; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s44 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s101 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s100 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[0:1], a[32:33], off +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[42:43] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[58:59] +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_15 +; GLOBALNESS1-NEXT: ; %bb.24: ; %bb62.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[0:1], v[44:45], off +; GLOBALNESS1-NEXT: s_branch .LBB1_15 +; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow14 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_readlane_b32 s56, v41, 0 +; GLOBALNESS1-NEXT: v_readlane_b32 s57, v41, 1 +; GLOBALNESS1-NEXT: v_readlane_b32 s68, v41, 12 +; GLOBALNESS1-NEXT: v_readlane_b32 s69, v41, 13 +; GLOBALNESS1-NEXT: v_readlane_b32 s70, v41, 14 +; GLOBALNESS1-NEXT: v_readlane_b32 s71, v41, 15 +; GLOBALNESS1-NEXT: v_readlane_b32 s72, v41, 16 +; GLOBALNESS1-NEXT: v_readlane_b32 s73, v41, 17 +; GLOBALNESS1-NEXT: v_readlane_b32 s74, v41, 18 +; GLOBALNESS1-NEXT: v_readlane_b32 s75, v41, 19 +; GLOBALNESS1-NEXT: v_readlane_b32 s76, v41, 20 +; GLOBALNESS1-NEXT: v_readlane_b32 s77, v41, 21 +; GLOBALNESS1-NEXT: v_readlane_b32 s78, v41, 22 +; GLOBALNESS1-NEXT: v_readlane_b32 s79, v41, 23 +; GLOBALNESS1-NEXT: v_readlane_b32 s80, v41, 24 +; GLOBALNESS1-NEXT: v_readlane_b32 s81, v41, 25 +; GLOBALNESS1-NEXT: v_readlane_b32 s82, v41, 26 +; GLOBALNESS1-NEXT: v_readlane_b32 s83, v41, 27 +; GLOBALNESS1-NEXT: v_readlane_b32 s84, v41, 28 +; GLOBALNESS1-NEXT: v_readlane_b32 s85, v41, 29 +; GLOBALNESS1-NEXT: s_mov_b32 s68, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s69, s57 +; GLOBALNESS1-NEXT: v_readlane_b32 s58, v41, 2 +; GLOBALNESS1-NEXT: v_readlane_b32 s59, v41, 3 +; GLOBALNESS1-NEXT: v_readlane_b32 s86, v41, 30 +; GLOBALNESS1-NEXT: v_readlane_b32 s87, v41, 31 +; GLOBALNESS1-NEXT: s_mov_b32 s70, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s71, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s72, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s73, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s74, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s75, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s76, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s77, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s78, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s79, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s80, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s81, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s82, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s83, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s84, s57 +; GLOBALNESS1-NEXT: s_mov_b32 s85, s57 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_readlane_b32 s60, v41, 4 +; GLOBALNESS1-NEXT: v_readlane_b32 s61, v41, 5 +; GLOBALNESS1-NEXT: v_readlane_b32 s62, v41, 6 +; GLOBALNESS1-NEXT: v_readlane_b32 s63, v41, 7 +; GLOBALNESS1-NEXT: v_readlane_b32 s64, v41, 8 +; GLOBALNESS1-NEXT: v_readlane_b32 s65, v41, 9 +; GLOBALNESS1-NEXT: v_readlane_b32 s66, v41, 10 +; GLOBALNESS1-NEXT: v_readlane_b32 s67, v41, 11 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1] +; GLOBALNESS1-NEXT: s_mov_b32 s57, s89 +; GLOBALNESS1-NEXT: s_mov_b64 s[58:59], s[92:93] +; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow15 +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[90:91] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[54:55] +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2 +; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_readlane_b32 s6, v41, 38 +; GLOBALNESS1-NEXT: v_readlane_b32 s7, v41, 39 +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1 +; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS1-NEXT: s_branch .LBB1_1 +; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i +; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[32:33], 0, 0 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS1-NEXT: s_branch .LBB1_2 +; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32 +; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] +; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s44 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s101 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s100 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] +; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 +; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 +; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow +; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34 +; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] +; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] +; GLOBALNESS1-NEXT: s_mov_b32 s12, s44 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s101 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s100 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v42 +; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] +; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 +; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock +; +; GLOBALNESS0-LABEL: kernel: +; GLOBALNESS0: ; %bb.0: ; %bb +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s16, 0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s15, 1 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s10, 2 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s11, 3 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s6, 4 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s7, 5 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 6 +; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9] +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 7 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v0 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v44, 0 +; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 +; GLOBALNESS0-NEXT: s_load_dwordx2 s[56:57], s[8:9], 0x8 +; GLOBALNESS0-NEXT: s_nop 0 +; GLOBALNESS0-NEXT: s_load_dword s8, s[8:9], 0x14 +; GLOBALNESS0-NEXT: s_nop 0 +; GLOBALNESS0-NEXT: s_load_dwordx2 s[6:7], s[38:39], 0x18 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS0-NEXT: global_store_dword v[0:1], v44, off +; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) +; GLOBALNESS0-NEXT: global_load_dword v0, v44, s[4:5] +; GLOBALNESS0-NEXT: s_mov_b32 s61, 0 +; GLOBALNESS0-NEXT: s_mov_b32 s60, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s62, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s63, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s64, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s65, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s66, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s67, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s68, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s69, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s70, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s71, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s72, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s73, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s74, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s75, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s76, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s77, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s78, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s79, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s80, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s81, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s82, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s83, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s84, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s85, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s86, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s87, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s88, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s89, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s90, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s91, s61 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a32, s60 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a33, s61 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a34, s62 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a35, s63 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a36, s64 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a37, s65 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a38, s66 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a39, s67 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a40, s68 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a41, s69 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a42, s70 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a43, s71 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a44, s72 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a45, s73 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a46, s74 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a47, s75 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a48, s76 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a49, s77 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a50, s78 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a51, s79 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a52, s80 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a53, s81 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a54, s82 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a55, s83 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a56, s84 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a57, s85 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a58, s86 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a59, s87 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a60, s88 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a61, s89 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a62, s90 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a63, s91 +; GLOBALNESS0-NEXT: s_movk_i32 s60, 0x80 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s60, 8 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s61, 9 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s62, 10 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s63, 11 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s64, 12 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s65, 13 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s66, 14 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s67, 15 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s68, 16 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s69, 17 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s70, 18 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s71, 19 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s72, 20 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s73, 21 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s74, 22 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s75, 23 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s76, 24 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s77, 25 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s78, 26 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s79, 27 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s80, 28 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s81, 29 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s82, 30 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s83, 31 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s84, 32 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s85, 33 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s86, 34 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s87, 35 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s88, 36 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s89, 37 +; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, 0x40994400 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s90, 38 +; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s91, 39 +; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[6:7], v[44:45] +; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 41 +; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[6:7], 0 +; GLOBALNESS0-NEXT: s_bitcmp1_b32 s56, 0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 42 +; GLOBALNESS0-NEXT: s_load_dword s9, s[38:39], 0x20 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 43 +; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS0-NEXT: s_xor_b64 s[36:37], s[4:5], -1 +; GLOBALNESS0-NEXT: s_bitcmp1_b32 s8, 0 +; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] +; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS0-NEXT: s_xor_b64 s[34:35], s[4:5], -1 +; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) +; GLOBALNESS0-NEXT: s_bitcmp1_b32 s9, 0 +; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS0-NEXT: s_getpc_b64 s[6:7] +; GLOBALNESS0-NEXT: s_add_u32 s6, s6, wobble@gotpcrel32@lo+4 +; GLOBALNESS0-NEXT: s_addc_u32 s7, s7, wobble@gotpcrel32@hi+12 +; GLOBALNESS0-NEXT: s_xor_b64 s[100:101], s[4:5], -1 +; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 44 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 45 +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[4:5], 1, v0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 46 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 47 +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 48 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 49 +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s4, 50 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v1 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s5, 51 +; GLOBALNESS0-NEXT: s_mov_b32 s45, 0x3ff00000 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s44, 52 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s56, 0 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s57, 1 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s58, 2 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s59, 3 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s60, 4 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s61, 5 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s62, 6 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s63, 7 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s64, 8 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s65, 9 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s45, 53 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s66, 10 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s46, 54 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s67, 11 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s47, 55 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s68, 12 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s48, 56 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s69, 13 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s49, 57 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s70, 14 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s50, 58 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s71, 15 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s51, 59 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s72, 16 +; GLOBALNESS0-NEXT: s_load_dwordx2 s[42:43], s[6:7], 0x0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s52, 60 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s73, 17 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s53, 61 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s74, 18 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s54, 62 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s75, 19 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s55, 63 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s56, 20 +; GLOBALNESS0-NEXT: s_mov_b32 s33, s14 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s57, 21 +; GLOBALNESS0-NEXT: s_mov_b32 s32, 0 +; GLOBALNESS0-NEXT: s_branch .LBB1_4 +; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 50 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 51 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29 +; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow6 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow19 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a63, v31 +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v42, 22 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a62, v30 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a61, v29 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a60, v28 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a59, v27 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a58, v26 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a57, v25 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a56, v24 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a55, v23 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a54, v22 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a53, v21 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a52, v20 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a51, v19 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a50, v18 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a49, v17 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a48, v16 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a47, v15 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a46, v14 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a45, v13 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a44, v12 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a43, v11 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a42, v10 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a41, v9 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a40, v8 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a39, v7 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a38, v6 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a37, v5 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a36, v4 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a35, v3 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a34, v2 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a33, v1 +; GLOBALNESS0-NEXT: v_accvgpr_write_b32 a32, v0 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v42, 23 +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_30 +; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5 +; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1 +; GLOBALNESS0-NEXT: ; Child Loop BB1_17 Depth 2 +; GLOBALNESS0-NEXT: v_readlane_b32 s60, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 9 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[60:61], s[60:61] op_sel:[0,1] +; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1] +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: buffer_store_dword v44, off, s[0:3], 0 +; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1] +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v41, 6 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 4 +; GLOBALNESS0-NEXT: v_readlane_b32 s10, v41, 2 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v41, 7 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 5 +; GLOBALNESS0-NEXT: v_readlane_b32 s11, v41, 3 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s33 +; GLOBALNESS0-NEXT: v_readlane_b32 s13, v41, 1 +; GLOBALNESS0-NEXT: v_readlane_b32 s14, v41, 0 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v43 +; GLOBALNESS0-NEXT: v_readlane_b32 s62, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 19 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 39 +; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[42:43] +; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[58:59] +; GLOBALNESS0-NEXT: ; kill: killed $sgpr4_sgpr5 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_10 +; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], -1 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 +; GLOBALNESS0-NEXT: s_cmp_lt_i32 s57, 1 +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7 +; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock3 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s57, 1 +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS0-NEXT: .LBB1_7: ; %Flow17 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9 +; GLOBALNESS0-NEXT: ; %bb.8: ; %LeafBlock +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s57, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], 0 +; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow18 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s8, 22 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s9, 23 +; GLOBALNESS0-NEXT: .LBB1_10: ; %Flow16 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_readlane_b32 s60, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 9 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 19 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 39 +; GLOBALNESS0-NEXT: s_mov_b32 s68, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s69, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s70, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s71, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s72, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s73, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s74, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s75, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s76, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s77, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s78, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s79, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s80, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s81, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s82, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s83, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s84, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s85, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s86, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s87, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s88, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s89, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s90, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s91, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s92, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s93, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s94, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s95, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s96, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s97, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s98, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s99, s61 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[96:97], s[96:97] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[98:99], s[98:99] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_readlane_b32 s62, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 15 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3 +; GLOBALNESS0-NEXT: ; %bb.11: ; %baz.exit.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS0-NEXT: flat_load_dword v0, v[0:1] +; GLOBALNESS0-NEXT: v_readlane_b32 s60, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 19 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 39 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 52 +; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 9 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 53 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 54 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 55 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 56 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 57 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 58 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 59 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 60 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 61 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 62 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 63 +; GLOBALNESS0-NEXT: v_readlane_b32 s62, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v42, 0 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v42, 1 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v42, 2 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v42, 3 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v42, 4 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v42, 5 +; GLOBALNESS0-NEXT: s_mov_b32 s64, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s66, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s67, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s68, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s69, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s70, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s71, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s72, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s73, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s74, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s75, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s45, s65 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v42, 6 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v42, 7 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v42, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v42, 9 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v42, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v42, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v42, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v42, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v42, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v42, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s92, v42, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s93, v42, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s94, v42, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s95, v42, 19 +; GLOBALNESS0-NEXT: s_mov_b32 s76, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s77, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s78, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s79, s65 +; GLOBALNESS0-NEXT: s_mov_b32 s80, s61 +; GLOBALNESS0-NEXT: s_mov_b32 s81, s65 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s44, 52 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s56, 0 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s57, 1 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s58, 2 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s59, 3 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s60, 4 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s61, 5 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s62, 6 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s63, 7 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s64, 8 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s45, 53 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s65, 9 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s46, 54 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s66, 10 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s47, 55 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s67, 11 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s48, 56 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s68, 12 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s49, 57 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s69, 13 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s50, 58 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s70, 14 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s51, 59 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s71, 15 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s52, 60 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s72, 16 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s53, 61 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s73, 17 +; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[96:97], 0, v0 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s54, 62 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s74, 18 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[64:65], s[64:65] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s55, 63 +; GLOBALNESS0-NEXT: v_writelane_b32 v42, s75, 19 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[66:67], s[66:67] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[76:77], s[76:77] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[78:79], s[78:79] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[80:81], s[80:81] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[82:83], s[82:83] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[84:85], s[84:85] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[86:87], s[86:87] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[88:89], s[88:89] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[90:91], s[90:91] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[92:93], s[92:93] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[94:95], s[94:95] op_sel:[0,1] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[40:41], s[96:97] +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26 +; GLOBALNESS0-NEXT: ; %bb.12: ; %bb33.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v41, 44 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v41, 45 +; GLOBALNESS0-NEXT: s_mov_b64 s[98:99], s[58:59] +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_14 +; GLOBALNESS0-NEXT: ; %bb.13: ; %bb39.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[44:45], off +; GLOBALNESS0-NEXT: .LBB1_14: ; %bb44.lr.ph.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46 +; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc +; GLOBALNESS0-NEXT: v_readlane_b32 s60, v41, 40 +; GLOBALNESS0-NEXT: v_readlane_b32 s62, v41, 42 +; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) +; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e64 s[56:57], 0, v[0:1] +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[58:59], 0, v2 +; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 41 +; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 43 +; GLOBALNESS0-NEXT: s_branch .LBB1_17 +; GLOBALNESS0-NEXT: .LBB1_15: ; %Flow7 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] +; GLOBALNESS0-NEXT: .LBB1_16: ; %bb63.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[100:101] +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25 +; GLOBALNESS0-NEXT: .LBB1_17: ; %bb44.i +; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1 +; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[36:37] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_16 +; GLOBALNESS0-NEXT: ; %bb.18: ; %bb46.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[34:35] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_16 +; GLOBALNESS0-NEXT: ; %bb.19: ; %bb50.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[60:61] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_22 +; GLOBALNESS0-NEXT: ; %bb.20: ; %bb3.i.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[62:63] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_22 +; GLOBALNESS0-NEXT: ; %bb.21: ; %bb6.i.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[56:57] +; GLOBALNESS0-NEXT: .LBB1_22: ; %spam.exit.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v41, 46 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v41, 47 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_16 +; GLOBALNESS0-NEXT: ; %bb.23: ; %bb55.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: s_add_u32 s64, s38, 40 +; GLOBALNESS0-NEXT: v_readlane_b32 s46, v41, 6 +; GLOBALNESS0-NEXT: v_readlane_b32 s48, v41, 4 +; GLOBALNESS0-NEXT: v_readlane_b32 s44, v41, 2 +; GLOBALNESS0-NEXT: s_addc_u32 s65, s39, 0 +; GLOBALNESS0-NEXT: v_readlane_b32 s47, v41, 7 +; GLOBALNESS0-NEXT: v_readlane_b32 s49, v41, 5 +; GLOBALNESS0-NEXT: v_readlane_b32 s45, v41, 3 +; GLOBALNESS0-NEXT: v_readlane_b32 s50, v41, 1 +; GLOBALNESS0-NEXT: v_readlane_b32 s51, v41, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[46:47] +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[64:65] +; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[44:45] +; GLOBALNESS0-NEXT: s_mov_b32 s12, s33 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s50 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s51 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v43 +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[42:43] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[46:47] +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[64:65] +; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[44:45] +; GLOBALNESS0-NEXT: s_mov_b32 s12, s33 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s50 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s51 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v43 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[0:1], a[32:33], off +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[42:43] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[58:59] +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_15 +; GLOBALNESS0-NEXT: ; %bb.24: ; %bb62.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_17 Depth=2 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[0:1], v[44:45], off +; GLOBALNESS0-NEXT: s_branch .LBB1_15 +; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow14 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_readlane_b32 s56, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s57, v41, 9 +; GLOBALNESS0-NEXT: v_readlane_b32 s58, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s59, v41, 11 +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[56:57] +; GLOBALNESS0-NEXT: v_readlane_b32 s60, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s62, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 17 +; GLOBALNESS0-NEXT: s_mov_b32 s56, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s57, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s58, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s59, s49 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 19 +; GLOBALNESS0-NEXT: s_mov_b32 s60, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s61, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s62, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s63, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s64, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s65, s49 +; GLOBALNESS0-NEXT: s_mov_b64 s[52:53], s[56:57] +; GLOBALNESS0-NEXT: s_mov_b32 s66, s49 +; GLOBALNESS0-NEXT: s_mov_b32 s67, s49 +; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[58:59] +; GLOBALNESS0-NEXT: s_mov_b64 s[56:57], s[60:61] +; GLOBALNESS0-NEXT: s_mov_b64 s[58:59], s[62:63] +; GLOBALNESS0-NEXT: s_mov_b64 s[60:61], s[64:65] +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 31 +; GLOBALNESS0-NEXT: s_mov_b64 s[62:63], s[66:67] +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 39 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s48, 8 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s49, 9 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s50, 10 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s51, 11 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s52, 12 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s53, 13 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s54, 14 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s55, 15 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s56, 16 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s57, 17 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s58, 18 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s59, 19 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s60, 20 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s61, 21 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s62, 22 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s63, 23 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s64, 24 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s65, 25 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s66, 26 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s67, 27 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s68, 28 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s69, 29 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s70, 30 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s71, 31 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s72, 32 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s73, 33 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s74, 34 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s75, 35 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s76, 36 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s77, 37 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s78, 38 +; GLOBALNESS0-NEXT: v_writelane_b32 v41, s79, 39 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 9 +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[64:65] +; GLOBALNESS0-NEXT: s_mov_b32 s64, s49 +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[52:53] +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 19 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s92, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s93, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s94, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s95, v41, 39 +; GLOBALNESS0-NEXT: s_mov_b64 s[50:51], s[54:55] +; GLOBALNESS0-NEXT: s_mov_b64 s[52:53], s[56:57] +; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[58:59] +; GLOBALNESS0-NEXT: s_mov_b64 s[56:57], s[60:61] +; GLOBALNESS0-NEXT: s_mov_b64 s[58:59], s[62:63] +; GLOBALNESS0-NEXT: s_mov_b32 s60, s64 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 9 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 19 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s92, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s93, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s94, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s95, v41, 39 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[36:37] +; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[64:65] +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 9 +; GLOBALNESS0-NEXT: s_mov_b32 s61, s37 +; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[64:65] +; GLOBALNESS0-NEXT: s_mov_b64 s[44:45], s[48:49] +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 19 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 35 +; GLOBALNESS0-NEXT: s_mov_b32 s62, s37 +; GLOBALNESS0-NEXT: s_mov_b64 s[46:47], s[50:51] +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[52:53] +; GLOBALNESS0-NEXT: s_mov_b64 s[50:51], s[54:55] +; GLOBALNESS0-NEXT: s_mov_b64 s[52:53], s[56:57] +; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[58:59] +; GLOBALNESS0-NEXT: s_mov_b64 s[56:57], s[60:61] +; GLOBALNESS0-NEXT: s_mov_b32 s58, s62 +; GLOBALNESS0-NEXT: v_readlane_b32 s60, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s61, v41, 9 +; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[60:61] +; GLOBALNESS0-NEXT: v_readlane_b32 s92, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s93, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s94, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s95, v41, 39 +; GLOBALNESS0-NEXT: v_readlane_b32 s62, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s63, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 19 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 39 +; GLOBALNESS0-NEXT: s_mov_b32 s59, s37 +; GLOBALNESS0-NEXT: s_mov_b64 s[62:63], s[58:59] +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 8 +; GLOBALNESS0-NEXT: s_mov_b64 s[60:61], s[56:57] +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 9 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 19 +; GLOBALNESS0-NEXT: s_mov_b64 s[58:59], s[54:55] +; GLOBALNESS0-NEXT: s_mov_b64 s[56:57], s[52:53] +; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[50:51] +; GLOBALNESS0-NEXT: s_mov_b64 s[52:53], s[48:49] +; GLOBALNESS0-NEXT: s_mov_b64 s[50:51], s[46:47] +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[44:45] +; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[64:65] +; GLOBALNESS0-NEXT: s_mov_b64 s[74:75], s[62:63] +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 20 +; GLOBALNESS0-NEXT: s_mov_b64 s[72:73], s[60:61] +; GLOBALNESS0-NEXT: s_mov_b64 s[70:71], s[58:59] +; GLOBALNESS0-NEXT: s_mov_b64 s[68:69], s[56:57] +; GLOBALNESS0-NEXT: s_mov_b64 s[66:67], s[54:55] +; GLOBALNESS0-NEXT: s_mov_b64 s[64:65], s[52:53] +; GLOBALNESS0-NEXT: s_mov_b64 s[62:63], s[50:51] +; GLOBALNESS0-NEXT: s_mov_b64 s[60:61], s[48:49] +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s92, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s93, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s94, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s95, v41, 39 +; GLOBALNESS0-NEXT: s_mov_b32 s76, s37 +; GLOBALNESS0-NEXT: s_mov_b64 s[44:45], s[60:61] +; GLOBALNESS0-NEXT: s_mov_b64 s[46:47], s[62:63] +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[64:65] +; GLOBALNESS0-NEXT: s_mov_b64 s[50:51], s[66:67] +; GLOBALNESS0-NEXT: s_mov_b64 s[52:53], s[68:69] +; GLOBALNESS0-NEXT: s_mov_b64 s[54:55], s[70:71] +; GLOBALNESS0-NEXT: s_mov_b64 s[56:57], s[72:73] +; GLOBALNESS0-NEXT: s_mov_b64 s[58:59], s[74:75] +; GLOBALNESS0-NEXT: s_mov_b32 s60, s76 +; GLOBALNESS0-NEXT: v_readlane_b32 s64, v41, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s65, v41, 9 +; GLOBALNESS0-NEXT: v_readlane_b32 s66, v41, 10 +; GLOBALNESS0-NEXT: v_readlane_b32 s67, v41, 11 +; GLOBALNESS0-NEXT: v_readlane_b32 s68, v41, 12 +; GLOBALNESS0-NEXT: v_readlane_b32 s69, v41, 13 +; GLOBALNESS0-NEXT: v_readlane_b32 s70, v41, 14 +; GLOBALNESS0-NEXT: v_readlane_b32 s71, v41, 15 +; GLOBALNESS0-NEXT: v_readlane_b32 s72, v41, 16 +; GLOBALNESS0-NEXT: v_readlane_b32 s73, v41, 17 +; GLOBALNESS0-NEXT: v_readlane_b32 s74, v41, 18 +; GLOBALNESS0-NEXT: v_readlane_b32 s75, v41, 19 +; GLOBALNESS0-NEXT: s_mov_b32 s61, s65 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], s[44:45], s[44:45] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], s[46:47], s[46:47] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[4:5], s[48:49], s[48:49] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[6:7], s[50:51], s[50:51] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[8:9], s[52:53], s[52:53] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[10:11], s[54:55], s[54:55] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[12:13], s[56:57], s[56:57] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[14:15], s[58:59], s[58:59] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[16:17], s[60:61], s[60:61] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[18:19], s[62:63], s[62:63] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[20:21], s[64:65], s[64:65] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[22:23], s[66:67], s[66:67] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[24:25], s[68:69], s[68:69] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[26:27], s[70:71], s[70:71] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[28:29], s[72:73], s[72:73] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[30:31], s[74:75], s[74:75] op_sel:[0,1] +; GLOBALNESS0-NEXT: v_readlane_b32 s56, v42, 20 +; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[4:5] +; GLOBALNESS0-NEXT: v_readlane_b32 s57, v42, 21 +; GLOBALNESS0-NEXT: s_mov_b64 s[58:59], s[98:99] +; GLOBALNESS0-NEXT: v_readlane_b32 s76, v41, 20 +; GLOBALNESS0-NEXT: v_readlane_b32 s77, v41, 21 +; GLOBALNESS0-NEXT: v_readlane_b32 s78, v41, 22 +; GLOBALNESS0-NEXT: v_readlane_b32 s79, v41, 23 +; GLOBALNESS0-NEXT: v_readlane_b32 s80, v41, 24 +; GLOBALNESS0-NEXT: v_readlane_b32 s81, v41, 25 +; GLOBALNESS0-NEXT: v_readlane_b32 s82, v41, 26 +; GLOBALNESS0-NEXT: v_readlane_b32 s83, v41, 27 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v41, 28 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v41, 29 +; GLOBALNESS0-NEXT: v_readlane_b32 s86, v41, 30 +; GLOBALNESS0-NEXT: v_readlane_b32 s87, v41, 31 +; GLOBALNESS0-NEXT: v_readlane_b32 s88, v41, 32 +; GLOBALNESS0-NEXT: v_readlane_b32 s89, v41, 33 +; GLOBALNESS0-NEXT: v_readlane_b32 s90, v41, 34 +; GLOBALNESS0-NEXT: v_readlane_b32 s91, v41, 35 +; GLOBALNESS0-NEXT: v_readlane_b32 s92, v41, 36 +; GLOBALNESS0-NEXT: v_readlane_b32 s93, v41, 37 +; GLOBALNESS0-NEXT: v_readlane_b32 s94, v41, 38 +; GLOBALNESS0-NEXT: v_readlane_b32 s95, v41, 39 +; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow15 +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[40:41] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2 +; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 48 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 49 +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1 +; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS0-NEXT: s_branch .LBB1_1 +; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i +; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v45, v44 +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[32:33], 0, 0 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[32:33], v[44:45], off +; GLOBALNESS0-NEXT: s_branch .LBB1_2 +; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32 +; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v41, 6 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 4 +; GLOBALNESS0-NEXT: v_readlane_b32 s10, v41, 2 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v41, 7 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 5 +; GLOBALNESS0-NEXT: v_readlane_b32 s11, v41, 3 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s33 +; GLOBALNESS0-NEXT: v_readlane_b32 s13, v41, 1 +; GLOBALNESS0-NEXT: v_readlane_b32 s14, v41, 0 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v43 +; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] +; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 +; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 +; GLOBALNESS0-NEXT: s_mov_b32 s34, s33 +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS0-NEXT: s_mov_b32 s33, s34 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 +; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow +; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34 +; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v41, 6 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v41, 4 +; GLOBALNESS0-NEXT: v_readlane_b32 s10, v41, 2 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v41, 7 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v41, 5 +; GLOBALNESS0-NEXT: v_readlane_b32 s11, v41, 3 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s33 +; GLOBALNESS0-NEXT: v_readlane_b32 s13, v41, 1 +; GLOBALNESS0-NEXT: v_readlane_b32 s14, v41, 0 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v43 +; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] +; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 +; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock +bb: + store i32 0, i32 addrspace(1)* null, align 4 + %tmp4 = load i32, i32 addrspace(1)* %arg1.global, align 4 + br label %bb5 + +bb5: ; preds = %bb5.backedge, %bb + %tmp4.i.sroa.0.0 = phi <9 x double> [ undef, %bb ], [ %tmp4.i.sroa.0.1, %bb5.backedge ] + %tmp14.1.i = load i32, i32* inttoptr (i64 128 to i32*), align 128 + store i32 0, i32 addrspace(5)* null, align 4 + %tmp14.2.i = load i32, i32* inttoptr (i64 128 to i32*), align 128 + %tmp15.2.i = icmp eq i32 %tmp14.2.i, 0 + %spec.select.2.i = select i1 %tmp15.2.i, i32 0, i32 %tmp14.1.i + tail call void @wobble() + br i1 %tmp3.i.i, label %bb4.i.i, label %baz.exit.i + +bb4.i.i: ; preds = %bb5 + switch i32 %tmp5.i.i, label %baz.exit.i [ + i32 0, label %bb7.i.i + i32 1, label %bb11.i.i + ] + +bb7.i.i: ; preds = %bb4.i.i + tail call fastcc void @widget() + unreachable + +bb11.i.i: ; preds = %bb4.i.i + tail call fastcc void @widget() + unreachable + +baz.exit.i: ; preds = %bb4.i.i, %bb5 + %tmp26.i = load i32, i32* null, align 4 + %tmp27.i4 = load double, double addrspace(1)* null, align 8 + %tmp31.i = icmp slt i32 %tmp26.i, 0 + br i1 %tmp31.i, label %bb33.i, label %bb64.i + +bb33.i: ; preds = %baz.exit.i + %tmp38.i = icmp slt i32 %tmp4, 0 + br i1 %tmp38.i, label %bb39.i, label %bb44.lr.ph.i + +bb39.i: ; preds = %bb33.i + store double 0.000000e+00, double addrspace(1)* null, align 8 + br label %bb44.lr.ph.i + +bb44.lr.ph.i: ; preds = %bb39.i, %bb33.i + br label %bb44.i + +bb44.i: ; preds = %bb63.i, %bb44.lr.ph.i + br i1 %tmp3.i.i, label %bb63.i, label %bb46.i + +bb46.i: ; preds = %bb44.i + br i1 %tmp438.i, label %bb63.i, label %bb50.i + +bb50.i: ; preds = %bb46.i + switch i32 0, label %spam.exit.i [ + i32 0, label %bb1.i.i + ] + +bb1.i.i: ; preds = %bb50.i + %tmp2.i.i = fcmp ogt double %tmp27.i, 1.617000e+03 + br i1 %tmp2.i.i, label %spam.exit.i, label %bb3.i.i + +bb3.i.i: ; preds = %bb1.i.i + %tmp4.i.i = fcmp ogt double %tmp27.i, 0.000000e+00 + br i1 %tmp4.i.i, label %spam.exit.i, label %bb6.i.i + +bb6.i.i: ; preds = %bb3.i.i + %tmp7.i.i = fcmp ogt double %tmp27.i4, 0.000000e+00 + br i1 %tmp7.i.i, label %spam.exit.i, label %bb8.i.i + +bb8.i.i: ; preds = %bb6.i.i + tail call void null() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb8.i.i, %bb6.i.i, %bb3.i.i, %bb1.i.i, %bb50.i + %tmp22.i = icmp sgt i32 %tmp4, 0 + br i1 %tmp22.i, label %bb63.i, label %bb55.i + +bb55.i: ; preds = %spam.exit.i + tail call void @wobble() + %tmp0 = extractelement <9 x double> %tmp4.i.sroa.0.0, i32 0 + store double %tmp0, double addrspace(1)* null, align 8 + tail call void @wobble() + %tmp61.i = icmp eq i32 %spec.select.2.i, 0 + br i1 %tmp61.i, label %bb62.i, label %bb63.i + +bb62.i: ; preds = %bb55.i + store double 0.000000e+00, double addrspace(1)* null, align 8 + br label %bb63.i + +bb63.i: ; preds = %bb62.i, %bb55.i, %spam.exit.i, %bb46.i, %bb44.i + br i1 %tmp48.i, label %bb44.i, label %bb64.i + +bb64.i: ; preds = %bb63.i, %baz.exit.i + %tmp4.i.sroa.0.1 = phi <9 x double> [ , %baz.exit.i ], [ zeroinitializer, %bb63.i ] + br i1 %tmp31.i, label %bb67.i, label %bb5.backedge + +bb5.backedge: ; preds = %bb73.i, %bb70.i, %bb64.i + br label %bb5 + +bb67.i: ; preds = %bb64.i + %tmp68.i = icmp eq i32 %tmp4, 1 + br i1 %tmp68.i, label %bb69.i, label %bb70.i + +bb69.i: ; preds = %bb67.i + store double 0.000000e+00, double addrspace(1)* null, align 8 + br label %bb70.i + +bb70.i: ; preds = %bb69.i, %bb67.i + %tmp3.i.i2 = icmp eq i32 %tmp4, 0 + br i1 %tmp3.i.i2, label %bb73.i, label %bb5.backedge + +bb73.i: ; preds = %bb70.i + store double 0.000000e+00, double addrspace(1)* null, align 8 + br label %bb5.backedge +} Index: llvm/test/CodeGen/Thumb2/mve-vst3.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -184,13 +184,13 @@ ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: vmov.f32 s2, s14 ; CHECK-NEXT: vmov.f32 s3, s18 +; CHECK-NEXT: vmov.f32 s21, s7 ; CHECK-NEXT: vstrw.32 q0, [sp, #96] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s21, s7 -; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.f64 d0, d4 ; CHECK-NEXT: vstrw.32 q5, [r1, #32] ; CHECK-NEXT: vmov.f32 s22, s11 +; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload +; CHECK-NEXT: vmov.f64 d0, d4 ; CHECK-NEXT: vmov.f32 s19, s10 ; CHECK-NEXT: vldrw.u32 q2, [sp, #80] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s21, s7 @@ -203,44 +203,45 @@ ; CHECK-NEXT: vmov.f32 s16, s1 ; CHECK-NEXT: vmov.f32 s13, s0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s6 -; CHECK-NEXT: vmov.f32 s15, s5 -; CHECK-NEXT: vmov.f32 s5, s27 -; CHECK-NEXT: vmov.f32 s8, s24 -; CHECK-NEXT: vmov.f32 s6, s3 -; CHECK-NEXT: vmov.f32 s9, s0 -; CHECK-NEXT: vmov.f32 s24, s1 -; CHECK-NEXT: vmov.f32 s27, s2 -; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vmov r0, r3, d14 ; CHECK-NEXT: vldrw.u32 q7, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s7, s11 -; CHECK-NEXT: vstrw.32 q0, [r1, #128] +; CHECK-NEXT: vmov.f32 s8, s24 +; CHECK-NEXT: vmov.f32 s9, s0 ; CHECK-NEXT: vmov.f32 s11, s25 -; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s20, s12 -; CHECK-NEXT: vmov.32 q6[1], r3 ; CHECK-NEXT: vmov.f32 s12, s4 -; CHECK-NEXT: vstrw.32 q6, [r1, #64] ; CHECK-NEXT: vmov.f32 s4, s10 ; CHECK-NEXT: vmov.32 q2[2], r0 ; CHECK-NEXT: vmov r0, lr, d14 ; CHECK-NEXT: vldrw.u32 q7, [sp, #144] @ 16-byte Reload -; CHECK-NEXT: vmov.32 q0[1], lr +; CHECK-NEXT: vmov.f32 s18, s6 ; CHECK-NEXT: vmov.32 q5[2], r0 -; CHECK-NEXT: vstrw.32 q0, [r1, #160] -; CHECK-NEXT: vldrw.u32 q0, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: vmov r2, r4, d14 +; CHECK-NEXT: vmov.f64 d12, d14 ; CHECK-NEXT: vstrw.32 q2, [r1, #48] -; CHECK-NEXT: vstrw.32 q0, [r1, #176] -; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload +; CHECK-NEXT: vstrw.32 q5, [r1, #144] +; CHECK-NEXT: vmov.f32 s15, s5 +; CHECK-NEXT: vmov.f32 s5, s27 +; CHECK-NEXT: vmov.f32 s6, s3 +; CHECK-NEXT: vmov.f32 s24, s1 +; CHECK-NEXT: vstrw.32 q1, [r1, #80] +; CHECK-NEXT: vmov.f32 s27, s2 +; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmov r2, r4, d14 +; CHECK-NEXT: vmov.32 q6[1], r3 +; CHECK-NEXT: vstrw.32 q0, [r1, #128] +; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vmov.32 q3[2], r2 ; CHECK-NEXT: vmov.32 q4[1], r4 -; CHECK-NEXT: vmov.32 q0[2], r12 -; CHECK-NEXT: vstrw.32 q1, [r1, #80] +; CHECK-NEXT: vmov.32 q0[1], lr +; CHECK-NEXT: vstrw.32 q6, [r1, #64] +; CHECK-NEXT: vstrw.32 q0, [r1, #160] +; CHECK-NEXT: vldrw.u32 q0, [sp, #112] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q3, [r1, #96] ; CHECK-NEXT: vstrw.32 q4, [r1, #112] -; CHECK-NEXT: vstrw.32 q5, [r1, #144] +; CHECK-NEXT: vstrw.32 q0, [r1, #176] +; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload +; CHECK-NEXT: vmov.32 q0[2], r12 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: add sp, #160 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} Index: llvm/test/CodeGen/Thumb2/mve-vst4.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -122,39 +122,29 @@ ; CHECK-NEXT: sub sp, #192 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] ; CHECK-NEXT: add r2, sp, #64 -; CHECK-NEXT: vldrw.u32 q1, [r0, #80] +; CHECK-NEXT: vldrw.u32 q4, [r0, #176] ; CHECK-NEXT: vldrw.u32 q3, [r0, #208] ; CHECK-NEXT: vldrw.u32 q2, [r0, #144] +; CHECK-NEXT: vldrw.u32 q1, [r0, #80] ; CHECK-NEXT: vstmia r2, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill -; CHECK-NEXT: vldrw.u32 q1, [r0, #64] -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q3, [r0, #192] -; CHECK-NEXT: vldrw.u32 q2, [r0, #128] -; CHECK-NEXT: vldrw.u32 q4, [r0, #240] -; CHECK-NEXT: vstmia r2, {d2, d3, d4, d5, d6, d7, d8, d9} @ 64-byte Spill -; CHECK-NEXT: add r2, sp, #128 ; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Reload -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q6, [r0, #176] -; CHECK-NEXT: vstmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Spill -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Reload -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q5, [r0, #112] -; CHECK-NEXT: vstmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Spill -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Reload -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: vldrw.u32 q2, [r0, #128] +; CHECK-NEXT: vldrw.u32 q5, [r0, #240] +; CHECK-NEXT: vmov q6, q4 +; CHECK-NEXT: vldrw.u32 q3, [r0, #192] +; CHECK-NEXT: vldrw.u32 q1, [r0, #64] ; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill ; CHECK-NEXT: vldrw.u32 q2, [r0, #160] +; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: add r2, sp, #128 +; CHECK-NEXT: vmov q7, q5 ; CHECK-NEXT: vldrw.u32 q3, [r0, #224] ; CHECK-NEXT: vldrw.u32 q1, [r0, #96] +; CHECK-NEXT: vldrw.u32 q5, [r0, #112] ; CHECK-NEXT: vstmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Spill ; CHECK-NEXT: vmov q6, q2 -; CHECK-NEXT: vmov q7, q3 ; CHECK-NEXT: vmov q5, q1 +; CHECK-NEXT: vmov q7, q3 ; CHECK-NEXT: vldmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Reload ; CHECK-NEXT: add r2, sp, #64 ; CHECK-NEXT: vldrw.u32 q4, [r0, #32] @@ -922,39 +912,29 @@ ; CHECK-NEXT: sub sp, #192 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] ; CHECK-NEXT: add r2, sp, #64 -; CHECK-NEXT: vldrw.u32 q1, [r0, #80] +; CHECK-NEXT: vldrw.u32 q4, [r0, #176] ; CHECK-NEXT: vldrw.u32 q3, [r0, #208] ; CHECK-NEXT: vldrw.u32 q2, [r0, #144] +; CHECK-NEXT: vldrw.u32 q1, [r0, #80] ; CHECK-NEXT: vstmia r2, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill -; CHECK-NEXT: vldrw.u32 q1, [r0, #64] -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q3, [r0, #192] -; CHECK-NEXT: vldrw.u32 q2, [r0, #128] -; CHECK-NEXT: vldrw.u32 q4, [r0, #240] -; CHECK-NEXT: vstmia r2, {d2, d3, d4, d5, d6, d7, d8, d9} @ 64-byte Spill -; CHECK-NEXT: add r2, sp, #128 ; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Reload -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q6, [r0, #176] -; CHECK-NEXT: vstmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Spill -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Reload -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q5, [r0, #112] -; CHECK-NEXT: vstmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Spill -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Reload -; CHECK-NEXT: add r2, sp, #128 -; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: vldrw.u32 q2, [r0, #128] +; CHECK-NEXT: vldrw.u32 q5, [r0, #240] +; CHECK-NEXT: vmov q6, q4 +; CHECK-NEXT: vldrw.u32 q3, [r0, #192] +; CHECK-NEXT: vldrw.u32 q1, [r0, #64] ; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Spill ; CHECK-NEXT: vldrw.u32 q2, [r0, #160] +; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: add r2, sp, #128 +; CHECK-NEXT: vmov q7, q5 ; CHECK-NEXT: vldrw.u32 q3, [r0, #224] ; CHECK-NEXT: vldrw.u32 q1, [r0, #96] +; CHECK-NEXT: vldrw.u32 q5, [r0, #112] ; CHECK-NEXT: vstmia r2, {d8, d9, d10, d11, d12, d13, d14, d15} @ 64-byte Spill ; CHECK-NEXT: vmov q6, q2 -; CHECK-NEXT: vmov q7, q3 ; CHECK-NEXT: vmov q5, q1 +; CHECK-NEXT: vmov q7, q3 ; CHECK-NEXT: vldmia sp, {d0, d1, d2, d3, d4, d5, d6, d7} @ 64-byte Reload ; CHECK-NEXT: add r2, sp, #64 ; CHECK-NEXT: vldrw.u32 q4, [r0, #32]