diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -946,6 +946,10 @@ MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; + /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand + /// was moved to VGPR. \returns true if succeeded. + bool moveFlatAddrToVGPR(MachineInstr &Inst) const; + /// Replace this instruction's opcode with the equivalent VALU /// opcode. This function will also move the users of \p MI to the /// VALU if necessary. If present, \p MDT is updated. @@ -1198,9 +1202,16 @@ LLVM_READONLY int getSOPKOp(uint16_t Opcode); + /// \returns SADDR form of a FLAT Global instruction given an \p Opcode + /// of a VADDR form. LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode); + /// \returns VADDR form of a FLAT Global instruction given an \p Opcode + /// of a SADDR form. + LLVM_READONLY + int getGlobalVaddrOp(uint16_t Opcode); + LLVM_READONLY int getVCMPXNoSDstOp(uint16_t Opcode); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5012,6 +5012,63 @@ } } +bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { + unsigned Opc = Inst.getOpcode(); + int OldSAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); + if (OldSAddrIdx < 0) + return false; + + assert(isSegmentSpecificFLAT(Inst)); + + int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); + if (NewOpc < 0) + return false; + + MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); + MachineOperand &SAddr = Inst.getOperand(OldSAddrIdx); + if (RI.isSGPRReg(MRI, SAddr.getReg())) + return false; + + int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr); + if (NewVAddrIdx < 0) + return false; + + int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); + assert(OldVAddrIdx >= 0); + + // Check vaddr, it shall be zero + MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); + MachineInstr *VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg()); + if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || + !VAddrDef->getOperand(1).isImm() || VAddrDef->getOperand(1).getImm() != 0) + return false; + + const MCInstrDesc &NewDesc = get(NewOpc); + Inst.setDesc(NewDesc); + + // Callers expect interator to be valid after this call, so modify the + // instruction in place. + if (OldVAddrIdx == NewVAddrIdx) { + MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx); + // Clear use list from the old vaddr holding a zero register. + MRI.removeRegOperandFromUseList(&NewVAddr); + MRI.moveOperands(&NewVAddr, &SAddr, 1); + Inst.RemoveOperand(OldSAddrIdx); + // Update the use list with the pointer we have just moved from vaddr to + // saddr poisition. Otherwise new vaddr will be missing from the use list. + MRI.removeRegOperandFromUseList(&NewVAddr); + MRI.addRegOperandToUseList(&NewVAddr); + } else { + assert(OldSAddrIdx == NewVAddrIdx); + Inst.RemoveOperand(OldVAddrIdx); + } + + if (MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) + VAddrDef->eraseFromParent(); + + return true; +} + // FIXME: Remove this when SelectionDAG is obsoleted. void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const { @@ -5024,6 +5081,9 @@ if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg()))) return; + if (moveFlatAddrToVGPR(MI)) + return; + Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI); SAddr->setReg(ToSGPR); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2505,6 +2505,15 @@ let ValueCols = [["1"]]; } +// Maps a GLOBAL SADDR to its VADDR form. +def getGlobalVaddrOp : InstrMapping { + let FilterClass = "GlobalSaddrTable"; + let RowFields = ["SaddrOp"]; + let ColFields = ["IsSaddr"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + // Maps a v_cmpx opcode with sdst to opcode without sdst. def getVCMPXNoSDstOp : InstrMapping { let FilterClass = "VCMPXNoSDstTable"; diff --git a/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +; The first load produces address in a VGPR which is used in address calculation +; of the second load (one inside the loop). The value is uniform and the inner +; load correctly selected to use SADDR form, however the address is promoted to +; vector registers because it all starts with a VGPR produced by the entry block +; load. +; +; Check that we are changing SADDR form of a load to VADDR and do not have to use +; readfirstlane instructions to move address from VGPRs into SGPRs. + +; GCN-LABEL: {{^}}test_move_load_address_to_vgpr: +; GCN: BB{{[0-9]+}}_1: +; GCN-NOT: v_readfirstlane_b32 +; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off glc +define amdgpu_kernel void @test_move_load_address_to_vgpr(i32 addrspace(1)* nocapture %arg) { +bb: + %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 0 + %i2 = load volatile i32, i32 addrspace(1)* %i1, align 4 + br label %bb3 + +bb2: ; preds = %bb3 + ret void + +bb3: ; preds = %bb3, %bb + %i = phi i32 [ %i2, %bb ], [ %i8, %bb3 ] + %i4 = zext i32 %i to i64 + %i5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %i4 + %i6 = load volatile i32, i32 addrspace(1)* %i5, align 4 + %i8 = add nuw nsw i32 %i, 1 + %i9 = icmp eq i32 %i8, 256 + br i1 %i9, label %bb2, label %bb3 +} diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir @@ -0,0 +1,413 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s + +--- +name: global_load_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_load_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI]], 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: global_load_saddr_to_valu_non_zero_vaddr +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_load_saddr_to_valu_non_zero_vaddr + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec + ; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GCN: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + + +--- +name: global_load_saddr_to_valu_undef_vaddr +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_load_saddr_to_valu_undef_vaddr + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec + ; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GCN: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, undef %3:vgpr_32, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: global_store_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_store_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: GLOBAL_STORE_DWORD [[PHI]], [[DEF]], 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %3, %4, %1, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: global_addtid_load_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_addtid_load_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec + ; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GCN: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR %1, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: global_store_addtid_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_store_addtid_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec + ; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GCN: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_ADDTID_SADDR %4, %1, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: global_atomic_noret_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_atomic_noret_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN: GLOBAL_ATOMIC_ADD [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + GLOBAL_ATOMIC_ADD_SADDR %3, %3, %1, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: global_atomic_rtn_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: global_atomic_rtn_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec + ; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0_vgpr1 + %0:sreg_64 = COPY $vgpr0_vgpr1 + + bb.1: + %1:sreg_64 = PHI %0, %bb.0, %2, %bb.1 + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vgpr_32 = GLOBAL_ATOMIC_ADD_SADDR_RTN %3, %3, %1, 0, 0, implicit $exec + %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc + S_CMP_LG_U64 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: scratch_load_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: scratch_load_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec + ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec + ; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0 + %0:sgpr_32 = COPY $vgpr0 + + bb.1: + %1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %1, 0, 0, implicit $exec, implicit $flat_scr + %2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc + S_CMP_LG_U32 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: scratch_store_saddr_to_valu +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: scratch_store_saddr_to_valu + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec + ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec + ; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec + ; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $vgpr0 + %0:sgpr_32 = COPY $vgpr0 + + bb.1: + %1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1 + %4:vgpr_32 = IMPLICIT_DEF + SCRATCH_STORE_DWORD_SADDR %4, %1, 0, 0, implicit $exec, implicit $flat_scr + %2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc + S_CMP_LG_U32 %2, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +...