diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -103,6 +103,10 @@ const TargetRegisterClass *getPointerRegClass( const MachineFunction &MF, unsigned Kind = 0) const override; + void readlaneIntoExec(MachineBasicBlock::iterator MI, Register ExecReg, + Register VGPR, int Lane, bool KillVGPR, bool DefineExec, + RegScavenger *RS) const; + void buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, int Index, int Offset, unsigned EltSize, Register VGPR, int64_t VGPRLanes, RegScavenger *RS, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -868,6 +868,79 @@ } } +// VGPR lanes cannot be read directly into EXEC registers. +// This function generates moves via a temporary SGPR or VGPR. +void SIRegisterInfo::readlaneIntoExec(MachineBasicBlock::iterator MI, + Register ExecReg, Register VGPR, int Lane, + bool KillVGPR, bool DefineExec, + RegScavenger *RS) const { + MachineBasicBlock *MBB = MI->getParent(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const DebugLoc &DL = MI->getDebugLoc(); + + Register TmpSGPR = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0_XEXECRegClass, MI, 0, false); + MachineInstrBuilder MIB; + + if (TmpSGPR) { + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + TmpSGPR) + .addReg(VGPR, getKillRegState(KillVGPR)) + .addImm(Lane); + MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ExecReg) + .addReg(TmpSGPR, RegState::Kill); + } else { + // No SGPRs are available; so we will need to borrow one and put it back + // when we are done. Use SGPR0 for this purpose. + TmpSGPR = AMDGPU::SGPR0; + + Register TmpVGPR = + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); + assert(TmpVGPR != VGPR); + if (TmpVGPR) { + // Use a VGPR to hold TmpSGPR value while reading exec lane. + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + TmpVGPR) + .addReg(TmpSGPR) + .addImm(0) + .addReg(TmpVGPR, RegState::Undef); + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + TmpSGPR) + .addReg(VGPR, getKillRegState(KillVGPR)) + .addImm(Lane); + MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ExecReg) + .addReg(TmpSGPR); + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + TmpSGPR) + .addReg(TmpVGPR, RegState::Kill) + .addImm(0); + } else { + // As EXEC is being overwritten we can use EXEC to temporary hold + // the value of the register we borrow. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ExecReg) + .addReg(TmpSGPR); + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + TmpSGPR) + .addReg(VGPR, getKillRegState(KillVGPR)) + .addImm(Lane); + + // Swap registers using XOR algorithm + // FIXME: this unavoidably clobbers SCC + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_XOR_B32), ExecReg) + .addReg(TmpSGPR) + .addReg(ExecReg); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_XOR_B32), TmpSGPR) + .addReg(TmpSGPR) + .addReg(ExecReg); + MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_XOR_B32), ExecReg) + .addReg(TmpSGPR) + .addReg(ExecReg); + } + } + if (DefineExec) + MIB.addReg(AMDGPU::EXEC, RegState::ImplicitDefine); +} + // Generate a VMEM access which loads or stores the VGPR containing an SGPR // spill such that all the lanes set in VGPRLanes are loaded or stored. // This generates exec mask manipulation and will use SGPRs available in MI @@ -887,63 +960,40 @@ const TargetRegisterClass *RC = getPhysRegClass(SuperReg); ArrayRef SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - unsigned FirstPart = isWave32 ? Offset * 16 : Offset * 32; + unsigned FirstPart = Offset * 32; + unsigned ExecLane = 0; bool IsKill = MI->getOperand(0).isKill(); const DebugLoc &DL = MI->getDebugLoc(); + // If SuperReg is EXEC_HI then intentionally ignore it here. const bool SuperRegIsExec = SuperReg == AMDGPU::EXEC || SuperReg == AMDGPU::EXEC_LO; - // If exec mask is stored in the VGPR, make sure it is stored after - // any lanes used by the spill (16 lanes on Wave32, 32 lanes on Wave64). - const unsigned ExecLoLane = SuperRegIsExec ? 0 : (isWave32 ? 16 : 32); - const unsigned ExecHiLane = SuperRegIsExec ? 1 : (isWave32 ? 17 : 33); - - // Try to use the src/dst SGPRs to hold a copy of the exec mask. - // Use VGPR lanes when this is not possible, i.e. the src value - // must be valid after the spill or src is smaller than exec mask. - bool StoreExecInVGPR = !IsLoad && (SuperRegIsExec || !IsKill); - // On Wave32 only handle EXEC_LO. // On Wave64 only update EXEC_HI if there is sufficent space for a copy. - bool OnlyExecLo = isWave32 || NumSubRegs == 1; + bool OnlyExecLo = isWave32 || NumSubRegs == 1 || SuperReg == AMDGPU::EXEC_HI; unsigned ExecMovOpc = OnlyExecLo ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; Register ExecReg = OnlyExecLo ? AMDGPU::EXEC_LO : AMDGPU::EXEC; Register SavedExecReg; - // Backup EXEC - if (SuperRegIsExec) { - // Do nothing; exec is already stored in VGPR or will be overwritten - } else if (StoreExecInVGPR) { - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), - VGPR) - .addReg(AMDGPU::EXEC_LO) - .addImm(ExecLoLane) - .addReg(VGPR, getUndefRegState(IsLoad)); - - if (!isWave32) { - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), - VGPR) - .addReg(AMDGPU::EXEC_HI) - .addImm(ExecHiLane) - .addReg(VGPR); - } - } else { + // Backup EXEC - if not already stored in VGPR or overwritten + if (!SuperRegIsExec) { if (OnlyExecLo) { - SavedExecReg = NumSubRegs == 1 - ? SuperReg - : getSubReg(SuperReg, SplitParts[FirstPart]); - } else { SavedExecReg = - getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart]), - AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass); + NumSubRegs == 1 + ? SuperReg + : getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]); + } else { // If src/dst is an odd size it is possible subreg0 is not aligned. - if (!SavedExecReg && NumSubRegs > 2) - SavedExecReg = - getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart + 1]), - AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass); + for (; ExecLane < (NumSubRegs - 1); ++ExecLane) { + SavedExecReg = getMatchingSuperReg( + getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]), AMDGPU::sub0, + &AMDGPU::SReg_64_XEXECRegClass); + if (SavedExecReg) + break; + } } assert(SavedExecReg); @@ -976,34 +1026,48 @@ Offset * EltSize, MMO, RS); } else { - buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, - Index, - VGPR, !StoreExecInVGPR, - MFI->getScratchRSrcReg(), FrameReg, - Offset * EltSize, MMO, - RS); + buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, Index, VGPR, + !SuperRegIsExec && IsKill, MFI->getScratchRSrcReg(), + FrameReg, Offset * EltSize, MMO, RS); // This only ever adds one VGPR spill MFI->addToSpilledVGPRs(1); } - // Restore EXEC - if (SuperRegIsExec && IsLoad) { - // Do nothing; exec will be overwritten - } else if (StoreExecInVGPR) { - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - AMDGPU::EXEC_LO) - .addReg(VGPR, getKillRegState(!IsLoad && isWave32)) - .addImm(ExecLoLane); - if (!isWave32) { - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - AMDGPU::EXEC_HI) - .addReg(VGPR, getKillRegState(!IsLoad)) - .addImm(ExecHiLane); - } - } else { + // Restore EXEC (if required) + if (!SuperRegIsExec) { + bool KillSavedExec = + (IsLoad || IsKill) && + !(SavedExecReg == AMDGPU::EXEC_LO || SavedExecReg == AMDGPU::EXEC_HI); assert(SavedExecReg); BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg) - .addReg(SavedExecReg, RegState::Kill); + .addReg(SavedExecReg, getKillRegState(KillSavedExec)); + } + + // Restore clobbered SGPRs + if (IsLoad) { + // Nothing to do; register will be overwritten + } else if (SuperRegIsExec || SuperReg == AMDGPU::EXEC_HI) { + // EXEC is always live + if (SuperReg == AMDGPU::EXEC) { + readlaneIntoExec(MI, AMDGPU::EXEC_LO, VGPR, 0, false, true, RS); + readlaneIntoExec(MI, AMDGPU::EXEC_HI, VGPR, 1, true, false, RS); + } else { + readlaneIntoExec(MI, SuperReg, VGPR, 0, true, false, RS); + } + } else if (!IsKill) { + // Restore SGPRs from appropriate VGPR lanes + if (!OnlyExecLo) { + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1])) + .addReg(VGPR) + .addImm(ExecLane + 1); + } + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + NumSubRegs == 1 + ? SavedExecReg + : getSubReg(SuperReg, SplitParts[FirstPart + ExecLane])) + .addReg(VGPR, RegState::Kill) + .addImm(ExecLane); } } @@ -1069,11 +1133,12 @@ // Scavenged temporary VGPR to use. It must be scavenged once for any number // of spilled subregs. Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); + RS->setRegUsed(TmpVGPR); // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); - unsigned PerVGPR = isWave32 ? 16 : 32; + unsigned PerVGPR = 32; unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; @@ -1150,21 +1215,27 @@ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { Register SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); + bool DefineSuperReg = NumSubRegs > 1 && i == 0; SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - auto MIB = - BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - SubReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane); - - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + if (SubReg == AMDGPU::EXEC_LO || SubReg == AMDGPU::EXEC_HI) { + readlaneIntoExec(MI, SubReg, Spill.VGPR, Spill.Lane, false, + DefineSuperReg, RS); + } else { + auto MIB = + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); + if (DefineSuperReg) + MIB.addReg(SuperReg, RegState::ImplicitDefine); + } } } else { Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); + RS->setRegUsed(TmpVGPR); - unsigned PerVGPR = isWave32 ? 16 : 32; + unsigned PerVGPR = 32; unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; @@ -1179,16 +1250,21 @@ i < e; ++i) { Register SubReg = NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); - bool LastSubReg = (i + 1 == e); - auto MIB = - BuildMI(*MBB, MI, DL, - TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) - .addReg(TmpVGPR, getKillRegState(LastSubReg)) - .addImm(i); + bool DefineSuperReg = NumSubRegs > 1 && i == 0; - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + if (SubReg == AMDGPU::EXEC_LO || SubReg == AMDGPU::EXEC_HI) { + readlaneIntoExec(MI, SubReg, TmpVGPR, i, LastSubReg, DefineSuperReg, + RS); + } else { + auto MIB = BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + SubReg) + .addReg(TmpVGPR, getKillRegState(LastSubReg)) + .addImm(i); + if (DefineSuperReg) + MIB.addReg(SuperReg, RegState::ImplicitDefine); + } } } } diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -8,15 +8,15 @@ # CHECK: V_WRITELANE # CHECK: $sgpr12 = S_MOV_B32 $exec_lo # CHECK: $exec_lo = S_MOV_B32 1 -# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 # CHECK: $exec_lo = S_MOV_B32 killed $sgpr12 # S32 without kill # CHECK: V_WRITELANE -# CHECK: V_WRITELANE +# CHECK: $sgpr12 = S_MOV_B32 $exec_lo # CHECK: $exec_lo = S_MOV_B32 1 -# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 -# CHECK: $exec_lo = V_READLANE +# CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 +# CHECK: $sgpr12 = V_READLANE # S64 with kill # CHECK: V_WRITELANE @@ -25,20 +25,22 @@ # GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec # GCN32: $exec_lo = S_MOV_B32 3 # GCN64: $exec = S_MOV_B64 3 -# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 # GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 # GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 # S64 without kill # CHECK: V_WRITELANE # CHECK: V_WRITELANE -# CHECK: V_WRITELANE -# GCN64: V_WRITELANE +# GCN32: $sgpr12 = S_MOV_B32 $exec_lo +# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec # GCN32: $exec_lo = S_MOV_B32 3 # GCN64: $exec = S_MOV_B64 3 -# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 -# CHECK: $exec_lo = V_READLANE -# GCN64: $exec_hi = V_READLANE +# CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8 +# GCN32: $exec_lo = S_MOV_B32 $sgpr12 +# GCN64: $exec = S_MOV_B64 $sgpr12_sgpr13 +# GCN64: $sgpr13 = V_READLANE +# CHECK: $sgpr12 = V_READLANE # S96 # CHECK: V_WRITELANE @@ -48,7 +50,7 @@ # GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec # GCN32: $exec_lo = S_MOV_B32 7 # GCN64: $exec = S_MOV_B64 7 -# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16 +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16 # GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 # GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 @@ -61,7 +63,7 @@ # GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec # GCN32: $exec_lo = S_MOV_B32 15 # GCN64: $exec = S_MOV_B64 15 -# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28 +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28 # GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 # GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13 @@ -138,10 +140,6 @@ # CHECK: V_WRITELANE # CHECK: V_WRITELANE # CHECK: V_WRITELANE -# GCN32: $sgpr64 = S_MOV_B32 $exec_lo -# GCN32: $exec_lo = S_MOV_B32 65535 -# GCN32: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 # CHECK: V_WRITELANE # CHECK: V_WRITELANE # CHECK: V_WRITELANE @@ -158,13 +156,12 @@ # CHECK: V_WRITELANE # CHECK: V_WRITELANE # CHECK: V_WRITELANE -# GCN32: $sgpr80 = S_MOV_B32 $exec_lo +# GCN32: $sgpr64 = S_MOV_B32 $exec_lo # GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 65535 +# GCN32: $exec_lo = S_MOV_B32 4294967295 # GCN64: $exec = S_MOV_B64 4294967295 -# GCN32: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 164 -# GCN64: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr80 +# CHECK: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 +# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 # GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65 --- | @@ -350,7 +347,7 @@ # S1024 # GCN32: $sgpr64 = S_MOV_B32 $exec_lo # GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec -# GCN32: $exec_lo = S_MOV_B32 65535 +# GCN32: $exec_lo = S_MOV_B32 4294967295 # GCN64: $exec = S_MOV_B64 4294967295 # CHECK: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 160 # GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 @@ -371,10 +368,6 @@ # CHECK: $sgpr77 = V_READLANE # CHECK: $sgpr78 = V_READLANE # CHECK: $sgpr79 = V_READLANE -# GCN32: $sgpr80 = S_MOV_B32 $exec_lo -# GCN32: $exec_lo = S_MOV_B32 65535 -# GCN32: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 164 -# GCN32: $exec_lo = S_MOV_B32 killed $sgpr80 # CHECK: $sgpr80 = V_READLANE # CHECK: $sgpr81 = V_READLANE # CHECK: $sgpr82 = V_READLANE diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -11,6 +11,14 @@ ret void } + define amdgpu_kernel void @check_exec_nosgprs() #0 { + ret void + } + + define amdgpu_kernel void @check_exec_nosgprs_novgprs() #0 { + ret void + } + attributes #0 = { "frame-pointer"="all" } ... --- @@ -53,12 +61,12 @@ ; GFX9: $vcc = IMPLICIT_DEF ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit $vcc - ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 32, $vgpr0 - ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 33, $vgpr0 + ; GFX9: $vcc = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3 ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 32 - ; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 33 + ; GFX9: $exec = S_MOV_B64 $vcc + ; GFX9: $vcc_hi = V_READLANE_B32_vi $vgpr0, 1 + ; GFX9: $vcc_lo = V_READLANE_B32_vi killed $vgpr0, 0 ; GFX9: $vcc = IMPLICIT_DEF ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $vcc_hi, 1, $vgpr0, implicit killed $vcc @@ -83,12 +91,12 @@ ; GFX10: $vcc = IMPLICIT_DEF ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit $vcc - ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 32, $vgpr0 - ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 33, $vgpr0 + ; GFX10: $vcc = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3 ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 32 - ; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 33 + ; GFX10: $exec = S_MOV_B64 $vcc + ; GFX10: $vcc_hi = V_READLANE_B32_gfx10 $vgpr0, 1 + ; GFX10: $vcc_lo = V_READLANE_B32_gfx10 killed $vgpr0, 0 ; GFX10: $vcc = IMPLICIT_DEF ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $vcc_hi, 1, $vgpr0, implicit killed $vcc @@ -121,6 +129,7 @@ maxAlignment: 4 stack: - { id: 0, type: spill-slot, size: 8, alignment: 4 } + - { id: 1, type: spill-slot, size: 4, alignment: 4 } machineFunctionInfo: isEntryFunction: true waveLimiter: true @@ -135,7 +144,7 @@ privateSegmentWaveByteOffset: { reg: '$sgpr9' } body: | bb.0: - liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 + liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $vcc ; CHECK-LABEL: name: check_exec ; CHECK: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 @@ -151,12 +160,38 @@ ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 1, $vgpr0, implicit $exec ; GFX9: $exec = S_MOV_B64 3 ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 0 - ; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 killed $sgpr0, implicit-def $exec + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $exec_hi = S_MOV_B32 killed $sgpr0 ; GFX9: $exec = S_MOV_B64 3 ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; GFX9: $exec_lo = V_READLANE_B32_vi $vgpr0, 0, implicit-def $exec - ; GFX9: $exec_hi = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 killed $sgpr0, implicit-def $exec + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $exec_hi = S_MOV_B32 killed $sgpr0 + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 0, undef $vgpr0 + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 0, undef $vgpr0 + ; GFX9: $exec_hi = S_MOV_B32 $exec_lo + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $exec_hi + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_hi = S_MOV_B32 killed $sgpr0 + ; GFX9: $exec_hi = S_MOV_B32 $exec_lo + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $exec_hi + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_hi = S_MOV_B32 killed $sgpr0 ; GFX10: $sgpr33 = S_MOV_B32 0 ; GFX10: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -169,13 +204,339 @@ ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 1, $vgpr0, implicit $exec ; GFX10: $exec = S_MOV_B64 3 ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 0 - ; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 killed $sgpr0, implicit-def $exec + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $exec_hi = S_MOV_B32 killed $sgpr0 ; GFX10: $exec = S_MOV_B64 3 ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; GFX10: $exec_lo = V_READLANE_B32_gfx10 $vgpr0, 0, implicit-def $exec - ; GFX10: $exec_hi = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 killed $sgpr0, implicit-def $exec + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $exec_hi = S_MOV_B32 killed $sgpr0 + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 0, undef $vgpr0 + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 0, undef $vgpr0 + ; GFX10: $exec_hi = S_MOV_B32 $exec_lo + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $exec_hi + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_hi = S_MOV_B32 killed $sgpr0 + ; GFX10: $exec_hi = S_MOV_B32 $exec_lo + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $exec_hi + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_hi = S_MOV_B32 killed $sgpr0 SI_SPILL_S64_SAVE $exec, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 $exec = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + SI_SPILL_S32_SAVE $exec_lo, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec_lo = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + SI_SPILL_S32_SAVE $exec_hi, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec_hi = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 +... +--- +name: check_exec_nosgprs +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } + - { id: 1, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41_sgpr42_sgpr43, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91, $sgpr92_sgpr93_sgpr94_sgpr95, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr104_sgpr105, $vcc + + ; CHECK-LABEL: name: check_exec_nosgprs + ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41_sgpr42_sgpr43, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91, $sgpr92_sgpr93_sgpr94_sgpr95, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr104_sgpr105, $vcc + + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 0, undef $vgpr0, implicit $exec + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 1, $vgpr0, implicit $exec + ; GFX9: $exec = S_MOV_B64 3 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0, implicit-def $exec + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + ; GFX9: $exec = S_MOV_B64 3 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0, implicit-def $exec + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 0, undef $vgpr0 + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 0, undef $vgpr0 + ; GFX9: $exec_hi = S_MOV_B32 $exec_lo + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $exec_hi + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + ; GFX9: $exec_hi = S_MOV_B32 $exec_lo + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $exec_hi + ; GFX9: $vgpr1 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr1 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr1, 0 + + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 0, undef $vgpr0, implicit $exec + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 1, $vgpr0, implicit $exec + ; GFX10: $exec = S_MOV_B64 3 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0, implicit-def $exec + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + ; GFX10: $exec = S_MOV_B64 3 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0, implicit-def $exec + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 0, undef $vgpr0 + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 0, undef $vgpr0 + ; GFX10: $exec_hi = S_MOV_B32 $exec_lo + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $exec_hi + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + ; GFX10: $exec_hi = S_MOV_B32 $exec_lo + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $exec_hi + ; GFX10: $vgpr1 = V_WRITELANE_B32_gfx10 $sgpr0, 0, undef $vgpr1 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr1, 0 + SI_SPILL_S64_SAVE $exec, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + SI_SPILL_S32_SAVE $exec_lo, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec_lo = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + SI_SPILL_S32_SAVE $exec_hi, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec_hi = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 +... +--- +name: check_exec_nosgprs_novgprs +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } + - { id: 1, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41_sgpr42_sgpr43, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91, $sgpr92_sgpr93_sgpr94_sgpr95, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr104_sgpr105, $vcc, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr64, $vgpr65, $vgpr66, $vgpr67, $vgpr68, $vgpr69, $vgpr70, $vgpr71, $vgpr72, $vgpr73, $vgpr74, $vgpr75, $vgpr76, $vgpr77, $vgpr78, $vgpr79, $vgpr80, $vgpr81, $vgpr82, $vgpr83, $vgpr84, $vgpr85, $vgpr86, $vgpr87, $vgpr88, $vgpr89, $vgpr90, $vgpr91, $vgpr92, $vgpr93, $vgpr94, $vgpr95, $vgpr96, $vgpr97, $vgpr98, $vgpr99, $vgpr100, $vgpr101, $vgpr102, $vgpr103, $vgpr104, $vgpr105, $vgpr106, $vgpr107, $vgpr108, $vgpr109, $vgpr110, $vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127, $vgpr128, $vgpr129, $vgpr130, $vgpr131, $vgpr132, $vgpr133, $vgpr134, $vgpr135, $vgpr136, $vgpr137, $vgpr138, $vgpr139, $vgpr140, $vgpr141, $vgpr142, $vgpr143, $vgpr144, $vgpr145, $vgpr146, $vgpr147, $vgpr148, $vgpr149, $vgpr150, $vgpr151, $vgpr152, $vgpr153, $vgpr154, $vgpr155, $vgpr156, $vgpr157, $vgpr158, $vgpr159, $vgpr160, $vgpr161, $vgpr162, $vgpr163, $vgpr164, $vgpr165, $vgpr166, $vgpr167, $vgpr168, $vgpr169, $vgpr170, $vgpr171, $vgpr172, $vgpr173, $vgpr174, $vgpr175, $vgpr176, $vgpr177, $vgpr178, $vgpr179, $vgpr180, $vgpr181, $vgpr182, $vgpr183, $vgpr184, $vgpr185, $vgpr186, $vgpr187, $vgpr188, $vgpr189, $vgpr190, $vgpr191, $vgpr192, $vgpr193, $vgpr194, $vgpr195, $vgpr196, $vgpr197, $vgpr198, $vgpr199, $vgpr200, $vgpr201, $vgpr202, $vgpr203, $vgpr204, $vgpr205, $vgpr206, $vgpr207, $vgpr208, $vgpr209, $vgpr210, $vgpr211, $vgpr212, $vgpr213, $vgpr214, $vgpr215, $vgpr216, $vgpr217, $vgpr218, $vgpr219, $vgpr220, $vgpr221, $vgpr222, $vgpr223, $vgpr224, $vgpr225, $vgpr226, $vgpr227, $vgpr228, $vgpr229, $vgpr230, $vgpr231, $vgpr232, $vgpr233, $vgpr234, $vgpr235, $vgpr236, $vgpr237, $vgpr238, $vgpr239, $vgpr240, $vgpr241, $vgpr242, $vgpr243, $vgpr244, $vgpr245, $vgpr246, $vgpr247, $vgpr248, $vgpr249, $vgpr250, $vgpr251, $vgpr252, $vgpr253, $vgpr254, $vgpr255 + + ; CHECK-LABEL: name: check_exec_nosgprs_novgprs + ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41_sgpr42_sgpr43, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91, $sgpr92_sgpr93_sgpr94_sgpr95, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr104_sgpr105, $vcc, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr64, $vgpr65, $vgpr66, $vgpr67, $vgpr68, $vgpr69, $vgpr70, $vgpr71, $vgpr72, $vgpr73, $vgpr74, $vgpr75, $vgpr76, $vgpr77, $vgpr78, $vgpr79, $vgpr80, $vgpr81, $vgpr82, $vgpr83, $vgpr84, $vgpr85, $vgpr86, $vgpr87, $vgpr88, $vgpr89, $vgpr90, $vgpr91, $vgpr92, $vgpr93, $vgpr94, $vgpr95, $vgpr96, $vgpr97, $vgpr98, $vgpr99, $vgpr100, $vgpr101, $vgpr102, $vgpr103, $vgpr104, $vgpr105, $vgpr106, $vgpr107, $vgpr108, $vgpr109, $vgpr110, $vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127, $vgpr128, $vgpr129, $vgpr130, $vgpr131, $vgpr132, $vgpr133, $vgpr134, $vgpr135, $vgpr136, $vgpr137, $vgpr138, $vgpr139, $vgpr140, $vgpr141, $vgpr142, $vgpr143, $vgpr144, $vgpr145, $vgpr146, $vgpr147, $vgpr148, $vgpr149, $vgpr150, $vgpr151, $vgpr152, $vgpr153, $vgpr154, $vgpr155, $vgpr156, $vgpr157, $vgpr158, $vgpr159, $vgpr160, $vgpr161, $vgpr162, $vgpr163, $vgpr164, $vgpr165, $vgpr166, $vgpr167, $vgpr168, $vgpr169, $vgpr170, $vgpr171, $vgpr172, $vgpr173, $vgpr174, $vgpr175, $vgpr176, $vgpr177, $vgpr178, $vgpr179, $vgpr180, $vgpr181, $vgpr182, $vgpr183, $vgpr184, $vgpr185, $vgpr186, $vgpr187, $vgpr188, $vgpr189, $vgpr190, $vgpr191, $vgpr192, $vgpr193, $vgpr194, $vgpr195, $vgpr196, $vgpr197, $vgpr198, $vgpr199, $vgpr200, $vgpr201, $vgpr202, $vgpr203, $vgpr204, $vgpr205, $vgpr206, $vgpr207, $vgpr208, $vgpr209, $vgpr210, $vgpr211, $vgpr212, $vgpr213, $vgpr214, $vgpr215, $vgpr216, $vgpr217, $vgpr218, $vgpr219, $vgpr220, $vgpr221, $vgpr222, $vgpr223, $vgpr224, $vgpr225, $vgpr226, $vgpr227, $vgpr228, $vgpr229, $vgpr230, $vgpr231, $vgpr232, $vgpr233, $vgpr234, $vgpr235, $vgpr236, $vgpr237, $vgpr238, $vgpr239, $vgpr240, $vgpr241, $vgpr242, $vgpr243, $vgpr244, $vgpr245, $vgpr246, $vgpr247, $vgpr248, $vgpr249, $vgpr250, $vgpr251, $vgpr252, $vgpr253, $vgpr254, $vgpr255 + + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 0, undef $vgpr0, implicit $exec + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 1, $vgpr0, implicit $exec + ; GFX9: $exec = S_MOV_B64 3 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc, implicit-def $exec + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $exec = S_MOV_B64 3 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc, implicit-def $exec + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 1 + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_lo, 0, undef $vgpr0 + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX9: $vgpr0 = V_WRITELANE_B32_vi $exec_hi, 0, undef $vgpr0 + ; GFX9: $exec_hi = S_MOV_B32 $exec_lo + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $exec_hi + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $exec_hi = S_MOV_B32 $exec_lo + ; GFX9: $exec_lo = S_MOV_B32 1 + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX9: $exec_lo = S_MOV_B32 $exec_hi + ; GFX9: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX9: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX9: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 0, undef $vgpr0, implicit $exec + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 1, $vgpr0, implicit $exec + ; GFX10: $exec = S_MOV_B64 3 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc, implicit-def $exec + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $exec = S_MOV_B64 3 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 $vgpr0, 0 + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc, implicit-def $exec + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 1 + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_lo, 0, undef $vgpr0 + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $exec_lo = S_XOR_B32 $sgpr0, $exec_lo, implicit-def $scc + ; GFX10: $vgpr0 = V_WRITELANE_B32_gfx10 $exec_hi, 0, undef $vgpr0 + ; GFX10: $exec_hi = S_MOV_B32 $exec_lo + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $exec_hi + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $exec_hi = S_MOV_B32 $exec_lo + ; GFX10: $exec_lo = S_MOV_B32 1 + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 12, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX10: $exec_lo = S_MOV_B32 $exec_hi + ; GFX10: $exec_hi = S_MOV_B32 $sgpr0 + ; GFX10: $sgpr0 = V_READLANE_B32_gfx10 killed $vgpr0, 0 + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $sgpr0 = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + ; GFX10: $exec_hi = S_XOR_B32 $sgpr0, $exec_hi, implicit-def $scc + SI_SPILL_S64_SAVE $exec, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + SI_SPILL_S32_SAVE $exec_lo, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec_lo = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + SI_SPILL_S32_SAVE $exec_hi, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec_hi = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 ...