Index: include/llvm/CodeGen/LiveInterval.h =================================================================== --- include/llvm/CodeGen/LiveInterval.h +++ include/llvm/CodeGen/LiveInterval.h @@ -780,6 +780,13 @@ const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const; + /// For a given lane mask @p LaneMask, determine if there are any defines + /// Ignore any defines marked as undef for the lane and also ignore any + /// IMPLICIT_DEFs + bool isSubRangeUndefined(LaneBitmask LaneMask, + const MachineRegisterInfo &MRI, + const SlotIndexes &Indexes) const; + /// Refines the subranges to support \p LaneMask. This may only be called /// for LI.hasSubrange()==true. Subregister ranges are split or created /// until \p LaneMask can be matched exactly. \p Mod is executed on the Index: lib/CodeGen/LiveInterval.cpp =================================================================== --- lib/CodeGen/LiveInterval.cpp +++ lib/CodeGen/LiveInterval.cpp @@ -941,6 +941,28 @@ } } +bool LiveInterval::isSubRangeUndefined(LaneBitmask LaneMask, + const MachineRegisterInfo &MRI, + const SlotIndexes &Indexes) const { + assert(TargetRegisterInfo::isVirtualRegister(reg)); + LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg); + assert((VRegMask & LaneMask).any()); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + for (const MachineOperand &MO : MRI.def_operands(reg)) { + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0) { // This must be a full copy and thus a true def + const MachineInstr &MI = *MO.getParent(); + if (MI.isImplicitDef()) // Not a true def so we can ignore + continue; + return false; + } + LaneBitmask DefMask = TRI.getSubRegIndexLaneMask(SubReg); + if ((DefMask & LaneMask).any()) // A true def + return false; + } + return true; // Didn't find any true defs for this SubRange/SubReg +} + raw_ostream& llvm::operator<<(raw_ostream& OS, const LiveRange::Segment &S) { return OS << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')'; } Index: lib/CodeGen/SplitKit.cpp =================================================================== --- lib/CodeGen/SplitKit.cpp +++ lib/CodeGen/SplitKit.cpp @@ -1386,6 +1386,13 @@ // %1 = COPY %0 if (S.empty()) continue; + // Check a similar situation with 's where there are + // potentially multpiple paths to the use, but all are only partial + // defs that don't define the subrange lane, or are + // IMPLICIT_DEFs - this is a corner case extension to the previous case + if (LI.isSubRangeUndefined(S.LaneMask, MRI, *LIS.getSlotIndexes())) + continue; + SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); SmallVector Undefs; Index: test/CodeGen/AMDGPU/subreg-split-live-in-error.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -0,0 +1,575 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr_spilling -amdgpu-vgpr-index-mode -run-pass=greedy -stress-regalloc=16 -o - %s | FileCheck -check-prefixes=GCN %s + +# This test is for a bug where the following happens +# An interval (for register with sub-registers) is split (as a prelude to +# a spill insertion). +# The 2 new intervals (as a result of the split) have sub-registers and hence sub-ranges +# and one of the sub-ranges is never properly defined or used (just an interval of the form +# [1234r,1234d:0) for an IMPLICIT_DEF +# +# The insertion of the copy for the split is in a BB with more than one predecessor, in one of which +# there is a partial def of the properly defined sub-register. This partial def previously marked the +# other sub register as undef, but with the addition of the copy for the split this is no longer +# strictly valid and this previously asserted due to not all predecessors having a live-out of the +# sub-register used in the copy +# +# A simplified form of this can be illustrated as +# +# bb.1: +# %0:vreg_64 = IMPLICIT_DEF +# … +# S_CBRANCH_SCC1 %bb.2, implicit $vcc +# S_BRANCH %bb.3 +# +# bb.2: +# ; predecessors: %bb.1, %bb.4 +# dead %1:vreg_64 = COPY %0:vreg_64 ; This is the point of the inserted split +# … +# S_BRANCH %bb.5 +# +# bb.3: +# ; predecessors: %bb.1 +# undef %0.sub0:vreg_64 = COPY %123:sreg_32 ; undef point for %0.sub1 +# … +# S_BRANCH %bb.4 +# +# bb.4 +# ; predecessors: %bb.4 +# … +# S_BRANCH %bb.2 +# +# This test exposes this scenario which caused previously caused an assert +... +--- +name: _amdgpu_ps_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_128, preferred-register: '' } + - { id: 1, class: vreg_128, preferred-register: '' } + - { id: 2, class: vreg_128, preferred-register: '' } + - { id: 3, class: vreg_128, preferred-register: '' } + - { id: 4, class: vreg_128, preferred-register: '' } + - { id: 5, class: vreg_1, preferred-register: '' } + - { id: 6, class: vreg_1, preferred-register: '' } + - { id: 7, class: vreg_1, preferred-register: '' } + - { id: 8, class: vreg_128, preferred-register: '' } + - { id: 9, class: vreg_128, preferred-register: '' } + - { id: 10, class: vreg_1, preferred-register: '' } + - { id: 11, class: vreg_1, preferred-register: '' } + - { id: 12, class: vgpr_32, preferred-register: '' } + - { id: 13, class: vreg_64, preferred-register: '' } + - { id: 14, class: sreg_64, preferred-register: '' } + - { id: 15, class: vreg_128, preferred-register: '' } + - { id: 16, class: vreg_128, preferred-register: '' } + - { id: 17, class: vreg_1, preferred-register: '' } + - { id: 18, class: vreg_128, preferred-register: '' } + - { id: 19, class: vreg_128, preferred-register: '' } + - { id: 20, class: vgpr_32, preferred-register: '' } + - { id: 21, class: vreg_128, preferred-register: '' } + - { id: 22, class: vreg_128, preferred-register: '' } + - { id: 23, class: vgpr_32, preferred-register: '' } + - { id: 24, class: vgpr_32, preferred-register: '' } + - { id: 25, class: sgpr_32, preferred-register: '' } + - { id: 26, class: sgpr_32, preferred-register: '' } + - { id: 27, class: sgpr_32, preferred-register: '' } + - { id: 28, class: sgpr_32, preferred-register: '' } + - { id: 29, class: sgpr_32, preferred-register: '' } + - { id: 30, class: sgpr_32, preferred-register: '' } + - { id: 31, class: vgpr_32, preferred-register: '' } + - { id: 32, class: vgpr_32, preferred-register: '' } + - { id: 33, class: vgpr_32, preferred-register: '' } + - { id: 34, class: vgpr_32, preferred-register: '' } + - { id: 35, class: sgpr_32, preferred-register: '' } + - { id: 36, class: vreg_128, preferred-register: '' } + - { id: 37, class: sreg_32, preferred-register: '' } + - { id: 38, class: vreg_128, preferred-register: '' } + - { id: 39, class: vreg_128, preferred-register: '' } + - { id: 40, class: sreg_32, preferred-register: '' } + - { id: 41, class: sreg_128, preferred-register: '' } + - { id: 42, class: vgpr_32, preferred-register: '' } + - { id: 43, class: vgpr_32, preferred-register: '' } + - { id: 44, class: vreg_128, preferred-register: '' } + - { id: 45, class: vgpr_32, preferred-register: '' } + - { id: 46, class: vreg_128, preferred-register: '' } + - { id: 47, class: vgpr_32, preferred-register: '' } + - { id: 48, class: vgpr_32, preferred-register: '' } + - { id: 49, class: sreg_128, preferred-register: '' } + - { id: 50, class: vgpr_32, preferred-register: '' } + - { id: 51, class: vgpr_32, preferred-register: '' } + - { id: 52, class: vgpr_32, preferred-register: '' } + - { id: 53, class: vgpr_32, preferred-register: '' } + - { id: 54, class: vgpr_32, preferred-register: '' } + - { id: 55, class: vgpr_32, preferred-register: '' } + - { id: 56, class: vgpr_32, preferred-register: '' } + - { id: 57, class: vgpr_32, preferred-register: '' } + - { id: 58, class: sreg_128, preferred-register: '' } + - { id: 59, class: vreg_64, preferred-register: '' } + - { id: 60, class: vreg_64, preferred-register: '' } + - { id: 61, class: vgpr_32, preferred-register: '' } + - { id: 62, class: vreg_64, preferred-register: '' } + - { id: 63, class: sreg_32_xm0, preferred-register: '' } + - { id: 64, class: sreg_256, preferred-register: '' } + - { id: 65, class: sreg_128, preferred-register: '' } + - { id: 66, class: sreg_64, preferred-register: '' } + - { id: 67, class: sreg_64, preferred-register: '' } + - { id: 68, class: vreg_128, preferred-register: '' } + - { id: 69, class: sreg_32, preferred-register: '' } + - { id: 70, class: sreg_128, preferred-register: '' } + - { id: 71, class: vgpr_32, preferred-register: '' } + - { id: 72, class: vgpr_32, preferred-register: '' } + - { id: 73, class: vreg_128, preferred-register: '' } + - { id: 74, class: sreg_32, preferred-register: '' } + - { id: 75, class: sreg_128, preferred-register: '' } + - { id: 76, class: vgpr_32, preferred-register: '' } + - { id: 77, class: vgpr_32, preferred-register: '' } + - { id: 78, class: vgpr_32, preferred-register: '' } + - { id: 79, class: sreg_64, preferred-register: '' } + - { id: 80, class: vgpr_32, preferred-register: '' } + - { id: 81, class: sreg_64, preferred-register: '' } + - { id: 82, class: sreg_64, preferred-register: '' } + - { id: 83, class: sreg_64, preferred-register: '' } + - { id: 84, class: sreg_64, preferred-register: '$vcc' } + - { id: 85, class: vreg_128, preferred-register: '' } + - { id: 86, class: sreg_64, preferred-register: '' } + - { id: 87, class: sreg_32, preferred-register: '' } + - { id: 88, class: sreg_128, preferred-register: '' } + - { id: 89, class: vgpr_32, preferred-register: '' } + - { id: 90, class: vgpr_32, preferred-register: '' } + - { id: 91, class: vreg_128, preferred-register: '' } + - { id: 92, class: vreg_128, preferred-register: '' } + - { id: 93, class: vgpr_32, preferred-register: '' } + - { id: 94, class: vgpr_32, preferred-register: '' } + - { id: 95, class: vgpr_32, preferred-register: '' } + - { id: 96, class: vgpr_32, preferred-register: '' } + - { id: 97, class: vgpr_32, preferred-register: '' } + - { id: 98, class: sreg_32, preferred-register: '' } + - { id: 99, class: vgpr_32, preferred-register: '' } + - { id: 100, class: vgpr_32, preferred-register: '' } + - { id: 101, class: vgpr_32, preferred-register: '' } + - { id: 102, class: vgpr_32, preferred-register: '' } + - { id: 103, class: vgpr_32, preferred-register: '' } + - { id: 104, class: sreg_128, preferred-register: '' } + - { id: 105, class: sreg_128, preferred-register: '' } + - { id: 106, class: vgpr_32, preferred-register: '' } + - { id: 107, class: vgpr_32, preferred-register: '' } + - { id: 108, class: vgpr_32, preferred-register: '' } + - { id: 109, class: vgpr_32, preferred-register: '' } + - { id: 110, class: vgpr_32, preferred-register: '' } + - { id: 111, class: vgpr_32, preferred-register: '' } + - { id: 112, class: sreg_128, preferred-register: '' } + - { id: 113, class: sreg_128, preferred-register: '' } + - { id: 114, class: sgpr_32, preferred-register: '' } + - { id: 115, class: vgpr_32, preferred-register: '' } + - { id: 116, class: vgpr_32, preferred-register: '' } + - { id: 117, class: vgpr_32, preferred-register: '' } + - { id: 118, class: vgpr_32, preferred-register: '' } + - { id: 119, class: vgpr_32, preferred-register: '' } + - { id: 120, class: vgpr_32, preferred-register: '' } + - { id: 121, class: vgpr_32, preferred-register: '' } + - { id: 122, class: vgpr_32, preferred-register: '' } + - { id: 123, class: vgpr_32, preferred-register: '' } + - { id: 124, class: vgpr_32, preferred-register: '' } + - { id: 125, class: vgpr_32, preferred-register: '' } + - { id: 126, class: sreg_64, preferred-register: '' } + - { id: 127, class: vreg_128, preferred-register: '' } + - { id: 128, class: vreg_128, preferred-register: '' } + - { id: 129, class: vgpr_32, preferred-register: '' } + - { id: 130, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 131, class: sreg_32_xm0, preferred-register: '' } + - { id: 132, class: sreg_32, preferred-register: '' } + - { id: 133, class: vgpr_32, preferred-register: '' } + - { id: 134, class: vgpr_32, preferred-register: '' } + - { id: 135, class: vgpr_32, preferred-register: '' } + - { id: 136, class: vgpr_32, preferred-register: '' } + - { id: 137, class: vgpr_32, preferred-register: '' } + - { id: 138, class: sreg_64, preferred-register: '$vcc' } + - { id: 139, class: sreg_64, preferred-register: '' } + - { id: 140, class: vgpr_32, preferred-register: '' } + - { id: 141, class: sreg_64, preferred-register: '$vcc' } + - { id: 142, class: sreg_64, preferred-register: '' } + - { id: 143, class: sreg_64, preferred-register: '' } + - { id: 144, class: sreg_64, preferred-register: '' } + - { id: 145, class: sreg_64, preferred-register: '' } + - { id: 146, class: vreg_128, preferred-register: '' } + - { id: 147, class: vreg_128, preferred-register: '' } + - { id: 148, class: vgpr_32, preferred-register: '' } + - { id: 149, class: vgpr_32, preferred-register: '' } + - { id: 150, class: vgpr_32, preferred-register: '' } + - { id: 151, class: vgpr_32, preferred-register: '' } + - { id: 152, class: vgpr_32, preferred-register: '' } + - { id: 153, class: vgpr_32, preferred-register: '' } + - { id: 154, class: vgpr_32, preferred-register: '' } + - { id: 155, class: vgpr_32, preferred-register: '' } + - { id: 156, class: vgpr_32, preferred-register: '' } + - { id: 157, class: vgpr_32, preferred-register: '' } + - { id: 158, class: vgpr_32, preferred-register: '' } + - { id: 159, class: vgpr_32, preferred-register: '' } + - { id: 160, class: vgpr_32, preferred-register: '' } + - { id: 161, class: vgpr_32, preferred-register: '' } + - { id: 162, class: sreg_64, preferred-register: '$vcc' } + - { id: 163, class: sreg_64, preferred-register: '' } + - { id: 164, class: vreg_128, preferred-register: '' } + - { id: 165, class: vreg_128, preferred-register: '' } + - { id: 166, class: sreg_64, preferred-register: '' } + - { id: 167, class: vreg_128, preferred-register: '' } + - { id: 168, class: vreg_128, preferred-register: '' } + - { id: 169, class: vgpr_32, preferred-register: '' } + - { id: 170, class: sreg_64, preferred-register: '' } + - { id: 171, class: vreg_128, preferred-register: '' } + - { id: 172, class: vreg_128, preferred-register: '' } + - { id: 173, class: sreg_64, preferred-register: '' } + - { id: 174, class: sreg_64, preferred-register: '$vcc' } + - { id: 175, class: vreg_128, preferred-register: '' } + - { id: 176, class: sreg_32, preferred-register: '' } + - { id: 177, class: vgpr_32, preferred-register: '' } + - { id: 178, class: vgpr_32, preferred-register: '' } + - { id: 179, class: vgpr_32, preferred-register: '' } + - { id: 180, class: vgpr_32, preferred-register: '' } + - { id: 181, class: vreg_64, preferred-register: '' } + - { id: 182, class: vreg_64, preferred-register: '' } + - { id: 183, class: vreg_64, preferred-register: '' } + - { id: 184, class: sreg_32_xm0, preferred-register: '' } + - { id: 185, class: sreg_256, preferred-register: '' } + - { id: 186, class: vgpr_32, preferred-register: '' } + - { id: 187, class: vgpr_32, preferred-register: '' } + - { id: 188, class: vgpr_32, preferred-register: '' } + - { id: 189, class: vgpr_32, preferred-register: '' } + - { id: 190, class: vgpr_32, preferred-register: '' } + - { id: 191, class: vgpr_32, preferred-register: '' } + - { id: 192, class: vgpr_32, preferred-register: '' } + - { id: 193, class: vgpr_32, preferred-register: '' } + - { id: 194, class: vgpr_32, preferred-register: '' } + - { id: 195, class: vgpr_32, preferred-register: '' } + - { id: 196, class: vgpr_32, preferred-register: '' } + - { id: 197, class: vgpr_32, preferred-register: '' } + - { id: 198, class: vgpr_32, preferred-register: '' } + - { id: 199, class: vgpr_32, preferred-register: '' } + - { id: 200, class: vgpr_32, preferred-register: '' } + - { id: 201, class: vgpr_32, preferred-register: '' } + - { id: 202, class: vgpr_32, preferred-register: '' } + - { id: 203, class: vgpr_32, preferred-register: '' } + - { id: 204, class: vgpr_32, preferred-register: '' } + - { id: 205, class: vgpr_32, preferred-register: '' } + - { id: 206, class: vgpr_32, preferred-register: '' } + - { id: 207, class: vgpr_32, preferred-register: '' } + - { id: 208, class: vgpr_32, preferred-register: '' } + - { id: 209, class: vgpr_32, preferred-register: '' } + - { id: 210, class: vgpr_32, preferred-register: '' } + - { id: 211, class: vgpr_32, preferred-register: '' } + - { id: 212, class: vgpr_32, preferred-register: '' } + - { id: 213, class: vgpr_32, preferred-register: '' } + - { id: 214, class: vreg_128, preferred-register: '' } + - { id: 215, class: vgpr_32, preferred-register: '' } + - { id: 216, class: vgpr_32, preferred-register: '' } + - { id: 217, class: vgpr_32, preferred-register: '' } + - { id: 218, class: vreg_128, preferred-register: '' } + - { id: 219, class: vreg_128, preferred-register: '' } + - { id: 220, class: vreg_128, preferred-register: '' } + - { id: 221, class: vgpr_32, preferred-register: '' } + - { id: 222, class: vgpr_32, preferred-register: '' } + - { id: 223, class: vgpr_32, preferred-register: '' } + - { id: 224, class: vreg_128, preferred-register: '' } + - { id: 225, class: vgpr_32, preferred-register: '' } + - { id: 226, class: vgpr_32, preferred-register: '' } + - { id: 227, class: vgpr_32, preferred-register: '' } + - { id: 228, class: vreg_128, preferred-register: '' } + - { id: 229, class: vgpr_32, preferred-register: '' } + - { id: 230, class: vgpr_32, preferred-register: '' } + - { id: 231, class: vgpr_32, preferred-register: '' } + - { id: 232, class: vreg_64, preferred-register: '' } + - { id: 233, class: vreg_128, preferred-register: '' } + - { id: 234, class: vreg_128, preferred-register: '' } + - { id: 235, class: vreg_1, preferred-register: '' } + - { id: 236, class: vreg_1, preferred-register: '' } + - { id: 237, class: vgpr_32, preferred-register: '' } + - { id: 238, class: vreg_128, preferred-register: '' } + - { id: 239, class: vreg_128, preferred-register: '' } + - { id: 240, class: vreg_1, preferred-register: '' } + - { id: 241, class: vreg_1, preferred-register: '' } + - { id: 242, class: vgpr_32, preferred-register: '' } + - { id: 243, class: vgpr_32, preferred-register: '' } + - { id: 244, class: vreg_128, preferred-register: '' } + - { id: 245, class: vreg_128, preferred-register: '' } + - { id: 246, class: sreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr2', virtual-reg: '%31' } + - { reg: '$vgpr3', virtual-reg: '%32' } + - { reg: '$vgpr4', virtual-reg: '%33' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $vgpr2, $vgpr3, $vgpr4 + + %33:vgpr_32 = COPY $vgpr4 + %32:vgpr_32 = COPY $vgpr3 + %31:vgpr_32 = COPY $vgpr2 + S_CBRANCH_SCC0 %bb.2, implicit undef $scc + + bb.1: + successors: %bb.5(0x80000000) + + undef %233.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec + %233.sub1:vreg_128 = COPY %233.sub0 + %233.sub2:vreg_128 = COPY %233.sub0 + S_BRANCH %bb.5 + + bb.2: + successors: %bb.3(0x40000000), %bb.4(0x40000000) + + S_CBRANCH_SCC0 %bb.4, implicit undef $scc + + bb.3: + successors: %bb.5(0x80000000) + + undef %233.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec + %233.sub1:vreg_128 = COPY %233.sub0 + S_BRANCH %bb.5 + + bb.4: + successors: %bb.5(0x80000000) + + %233:vreg_128 = IMPLICIT_DEF + + bb.5: + successors: %bb.6(0x40000000), %bb.22(0x40000000) + + %243:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + S_CBRANCH_SCC1 %bb.22, implicit undef $scc + S_BRANCH %bb.6 + + bb.6: + successors: %bb.8(0x40000000), %bb.11(0x40000000) + + %242:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + dead %53:vgpr_32 = V_MUL_F32_e32 0, undef %54:vgpr_32, implicit $exec + dead %55:vgpr_32 = V_MUL_F32_e32 0, %33, implicit $exec + undef %62.sub1:vreg_64 = V_MUL_F32_e32 0, %32, implicit $exec + undef %219.sub0:vreg_128 = V_MUL_F32_e32 0, %31, implicit $exec + undef %64.sub0:sreg_256 = S_MOV_B32 0 + %64.sub1:sreg_256 = COPY %64.sub0 + %64.sub2:sreg_256 = COPY %64.sub0 + %64.sub3:sreg_256 = COPY %64.sub0 + %64.sub4:sreg_256 = COPY %64.sub0 + %64.sub5:sreg_256 = COPY %64.sub0 + %64.sub6:sreg_256 = COPY %64.sub0 + %64.sub7:sreg_256 = COPY %64.sub0 + %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %62, %64, undef %65:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %237:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + %244:vreg_128 = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit undef $scc + S_BRANCH %bb.11 + + bb.7: + successors: %bb.13(0x80000000) + + undef %244.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec + %244.sub1:vreg_128 = COPY %244.sub0 + %244.sub2:vreg_128 = COPY %244.sub0 + %242:vgpr_32 = IMPLICIT_DEF + S_BRANCH %bb.13 + + bb.8: + successors: %bb.9(0x40000000), %bb.10(0x40000000) + + S_CBRANCH_SCC0 %bb.10, implicit undef $scc + + bb.9: + successors: %bb.12(0x80000000) + + undef %244.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec + %244.sub1:vreg_128 = COPY %244.sub0 + %244.sub2:vreg_128 = COPY %244.sub0 + S_BRANCH %bb.12 + + bb.10: + successors: %bb.12(0x80000000) + + undef %244.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec + %244.sub1:vreg_128 = COPY %244.sub0 + %244.sub2:vreg_128 = COPY %244.sub0 + S_BRANCH %bb.12 + + bb.11: + successors: %bb.7(0x40000000), %bb.13(0x40000000) + + %84:sreg_64 = V_CMP_NE_U32_e64 0, %237, implicit $exec + %83:sreg_64 = S_AND_B64 $exec, %84, implicit-def dead $scc + $vcc = COPY %83 + S_CBRANCH_VCCNZ %bb.7, implicit $vcc + S_BRANCH %bb.13 + + bb.12: + successors: %bb.11(0x80000000) + + %237:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %242:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + S_BRANCH %bb.11 + + bb.13: + successors: %bb.15(0x40000000), %bb.14(0x40000000) + + ; In reality we are checking that this code doesn't assert when splitting and inserting a spill + ; Here we just check that the point where the error occurs we see a correctly generated spill + ; GCN-LABEL: bb.13: + ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec + + %97:vgpr_32 = V_MAD_F32 0, %219.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $exec + %99:vgpr_32 = V_MAD_F32 0, %3.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec + %104:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %105:sreg_128, 1040, 0 :: (dereferenceable invariant load 16) + %101:vgpr_32 = V_ADD_F32_e32 0, %99, implicit $exec + %102:vgpr_32 = V_MAD_F32 0, %97, 0, 0, 0, 0, 0, 0, implicit $exec + %107:vgpr_32 = COPY %104.sub3 + %109:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %104.sub1, 0, 0, implicit $exec + %112:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %113:sreg_128, 1056, 0 :: (dereferenceable invariant load 16) + %115:vgpr_32 = V_MAD_F32 0, %97, 0, %112.sub0, 0, 0, 0, 0, implicit $exec + %12:vgpr_32 = V_ADD_F32_e32 %115, %99, implicit $exec + %117:vgpr_32 = V_RCP_F32_e32 %12, implicit $exec + %109:vgpr_32 = V_MAC_F32_e32 0, %97, %109, implicit $exec + %110:vgpr_32 = V_MAD_F32 0, target-flags(amdgpu-gotprel) 0, 0, %3.sub0, 0, %107, 0, 0, implicit $exec + %111:vgpr_32 = V_ADD_F32_e32 %109, %110, implicit $exec + %118:vgpr_32 = V_MUL_F32_e32 %101, %117, implicit $exec + %119:vgpr_32 = V_MUL_F32_e32 %102, %117, implicit $exec + %120:vgpr_32 = V_MUL_F32_e32 %111, %117, implicit $exec + %122:vgpr_32 = V_MUL_F32_e32 0, %119, implicit $exec + %122:vgpr_32 = V_MAC_F32_e32 0, %118, %122, implicit $exec + %123:vgpr_32 = V_MAD_F32 0, %120, 0, 0, 0, 0, 0, 0, implicit $exec + %130:sreg_64_xexec = V_CMP_NE_U32_e64 0, %242, implicit $exec + %129:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %130, implicit $exec + V_CMP_NE_U32_e32 1, %129, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + %125:vgpr_32 = V_ADD_F32_e32 %122, %123, implicit $exec + S_CBRANCH_VCCZ %bb.15, implicit $vcc + + bb.14: + successors: %bb.17(0x80000000) + + S_BRANCH %bb.17 + + bb.15: + successors: %bb.16(0x40000000), %bb.18(0x40000000) + + %136:vgpr_32 = V_MAD_F32 0, %125, 0, 0, 0, 0, 0, 0, implicit $exec + %138:sreg_64 = V_CMP_LE_F32_e64 0, 0, 0, %136, 0, implicit $exec + %141:sreg_64 = V_CMP_GE_F32_e64 0, 1065353216, 0, %136, 0, implicit $exec + %143:sreg_64 = S_AND_B64 %141, %141, implicit-def dead $scc + %144:sreg_64 = S_AND_B64 %138, %138, implicit-def dead $scc + %145:sreg_64 = S_AND_B64 %144, %143, implicit-def dead $scc + %14:sreg_64 = COPY $exec, implicit-def $exec + %246:sreg_64 = S_AND_B64 %14, %145, implicit-def dead $scc + $exec = S_MOV_B64_term %246 + SI_MASK_BRANCH %bb.18, implicit $exec + S_BRANCH %bb.16 + + bb.16: + successors: %bb.18(0x80000000) + + S_BRANCH %bb.18 + + bb.17: + successors: %bb.21(0x40000000), %bb.23(0x40000000) + + %174:sreg_64 = V_CMP_NE_U32_e64 0, %242, implicit $exec + %173:sreg_64 = S_AND_B64 $exec, %174, implicit-def dead $scc + %245:vreg_128 = IMPLICIT_DEF + $vcc = COPY %173 + S_CBRANCH_VCCNZ %bb.21, implicit $vcc + S_BRANCH %bb.23 + + bb.18: + successors: %bb.20(0x40000000), %bb.19(0x40000000) + + $exec = S_OR_B64 $exec, %14, implicit-def $scc + %153:vgpr_32 = V_MAD_F32 0, %233.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 1, %233.sub0, 0, 0, implicit $exec + %157:vgpr_32 = V_MUL_F32_e32 -2147483648, %233.sub1, implicit $exec + %157:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 1065353216, %233.sub2, %157, implicit $exec + %159:vgpr_32 = V_MUL_F32_e32 %157, %157, implicit $exec + %159:vgpr_32 = V_MAC_F32_e32 %153, %153, %159, implicit $exec + %160:vgpr_32 = V_SQRT_F32_e32 %159, implicit $exec + %242:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %161:vgpr_32 = V_MOV_B32_e32 981668463, implicit $exec + %162:sreg_64 = V_CMP_NGT_F32_e64 0, %160, 0, %161, 0, implicit $exec + %166:sreg_64 = S_AND_B64 $exec, %162, implicit-def dead $scc + $vcc = COPY %166 + S_CBRANCH_VCCZ %bb.20, implicit $vcc + + bb.19: + successors: %bb.17(0x80000000) + + S_BRANCH %bb.17 + + bb.20: + successors: %bb.17(0x80000000) + + S_BRANCH %bb.17 + + bb.21: + successors: %bb.23(0x80000000) + + %176:sreg_32 = S_MOV_B32 0 + undef %245.sub0:vreg_128 = COPY %176 + S_BRANCH %bb.23 + + bb.22: + successors: %bb.24(0x80000000) + + S_BRANCH %bb.24 + + bb.23: + successors: %bb.22(0x80000000) + + undef %183.sub1:vreg_64 = V_CVT_I32_F32_e32 %32, implicit $exec + %183.sub0:vreg_64 = V_CVT_I32_F32_e32 %31, implicit $exec + undef %185.sub0:sreg_256 = S_MOV_B32 0 + %185.sub1:sreg_256 = COPY %185.sub0 + %185.sub2:sreg_256 = COPY %185.sub0 + %185.sub3:sreg_256 = COPY %185.sub0 + %185.sub4:sreg_256 = COPY %185.sub0 + %185.sub5:sreg_256 = COPY %185.sub0 + %185.sub6:sreg_256 = COPY %185.sub0 + %185.sub7:sreg_256 = COPY %185.sub0 + %193:vgpr_32 = V_MOV_B32_e32 1033100696, implicit $exec + %202:vgpr_32 = V_MUL_F32_e32 1060575065, %244.sub1, implicit $exec + %202:vgpr_32 = V_MAC_F32_e32 1046066128, %244.sub0, %202, implicit $exec + %190:vgpr_32 = IMAGE_LOAD_V1_V2 %183, %185, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %190:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %245.sub0, %190, implicit $exec + %191:vgpr_32 = V_MUL_F32_e32 0, %190, implicit $exec + %192:vgpr_32 = V_MUL_F32_e32 0, %191, implicit $exec + %194:vgpr_32 = V_MAD_F32 0, %192, 0, %193, 0, 0, 0, 0, implicit $exec + %202:vgpr_32 = V_MAC_F32_e32 %244.sub2, %193, %202, implicit $exec + %243:vgpr_32 = V_ADD_F32_e32 %202, %194, implicit $exec + S_BRANCH %bb.22 + + bb.24: + %204:vgpr_32 = V_MUL_F32_e32 0, %243, implicit $exec + %205:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %206:vgpr_32, 0, %204, 0, implicit $exec + EXP 0, undef %207:vgpr_32, %205, undef %208:vgpr_32, undef %209:vgpr_32, -1, -1, 15, implicit $exec + S_ENDPGM + +...