diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -249,7 +249,6 @@ bool isCBranchSCC(const SDNode *N) const; void SelectBRCOND(SDNode *N); void SelectFMAD_FMA(SDNode *N); - void SelectATOMIC_CMP_SWAP(SDNode *N); void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectInterpP1F16(SDNode *N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -679,9 +679,6 @@ case ISD::FMA: SelectFMAD_FMA(N); return; - case AMDGPUISD::ATOMIC_CMP_SWAP: - SelectATOMIC_CMP_SWAP(N); - return; case AMDGPUISD::CVT_PKRTZ_F16_F32: case AMDGPUISD::CVT_PKNORM_I16_F32: case AMDGPUISD::CVT_PKNORM_U16_F32: @@ -2278,70 +2275,6 @@ } } -// This is here because there isn't a way to use the generated sub0_sub1 as the -// subreg index to EXTRACT_SUBREG in tablegen. -void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { - MemSDNode *Mem = cast(N); - unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUAS::FLAT_ADDRESS) { - SelectCode(N); - return; - } - - MVT VT = N->getSimpleValueType(0); - bool Is32 = (VT == MVT::i32); - SDLoc SL(N); - - MachineSDNode *CmpSwap = nullptr; - if (Subtarget->hasAddr64()) { - SDValue SRsrc, VAddr, SOffset, Offset; - - if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) { - unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; - SDValue CmpVal = Mem->getOperand(2); - SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); - - // XXX - Do we care about glue operands? - - SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol, - Mem->getChain()}; - - CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); - } - } - - if (!CmpSwap) { - SDValue SRsrc, SOffset, Offset; - if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) { - unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; - - SDValue CmpVal = Mem->getOperand(2); - SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); - SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()}; - - CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); - } - } - - if (!CmpSwap) { - SelectCode(N); - return; - } - - MachineMemOperand *MMO = Mem->getMemOperand(); - CurDAG->setNodeMemRefs(CmpSwap, {MMO}); - - unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; - SDValue Extract - = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); - - ReplaceUses(SDValue(N, 0), Extract); - ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); - CurDAG->RemoveDeadNode(N); -} - void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { // The address is assumed to be uniform, so if it ends up in a VGPR, it will // be copied to an SGPR with readfirstlane. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -133,7 +133,6 @@ void initM0(MachineInstr &I) const; bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const; - bool selectG_AMDGPU_ATOMIC_CMPXCHG(MachineInstr &I) const; bool selectG_SELECT(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; bool selectG_GLOBAL_VALUE(MachineInstr &I) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2385,65 +2385,6 @@ return selectImpl(I, *CoverageInfo); } -// TODO: No rtn optimization. -bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG( - MachineInstr &MI) const { - Register PtrReg = MI.getOperand(1).getReg(); - const LLT PtrTy = MRI->getType(PtrReg); - if (PtrTy.getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || - STI.useFlatForGlobal()) - return selectImpl(MI, *CoverageInfo); - - Register DstReg = MI.getOperand(0).getReg(); - const LLT Ty = MRI->getType(DstReg); - const bool Is64 = Ty.getSizeInBits() == 64; - const unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; - Register TmpReg = MRI->createVirtualRegister( - Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass); - - const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock *BB = MI.getParent(); - - Register VAddr, RSrcReg, SOffset; - int64_t Offset = 0; - - unsigned Opcode; - if (selectMUBUFOffsetImpl(MI.getOperand(1), RSrcReg, SOffset, Offset)) { - Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN; - } else if (selectMUBUFAddr64Impl(MI.getOperand(1), VAddr, - RSrcReg, SOffset, Offset)) { - Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN : - AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN; - } else - return selectImpl(MI, *CoverageInfo); - - auto MIB = BuildMI(*BB, &MI, DL, TII.get(Opcode), TmpReg) - .addReg(MI.getOperand(2).getReg()); - - if (VAddr) - MIB.addReg(VAddr); - - MIB.addReg(RSrcReg); - if (SOffset) - MIB.addReg(SOffset); - else - MIB.addImm(0); - - MIB.addImm(Offset); - MIB.addImm(AMDGPU::CPol::GLC); - MIB.cloneMemRefs(MI); - - BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), DstReg) - .addReg(TmpReg, RegState::Kill, SubReg); - - MI.eraseFromParent(); - - MRI->setRegClass( - DstReg, Is64 ? &AMDGPU::VReg_64RegClass : &AMDGPU::VGPR_32RegClass); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); -} - static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) { if (Reg.isPhysical()) return false; @@ -3227,8 +3168,6 @@ case AMDGPU::G_AMDGPU_ATOMIC_FMIN: case AMDGPU::G_AMDGPU_ATOMIC_FMAX: return selectG_LOAD_STORE_ATOMICRMW(I); - case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: - return selectG_AMDGPU_ATOMIC_CMPXCHG(I); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_TRUNC: diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1399,6 +1399,37 @@ } // end foreach RtnMode } +multiclass BufferAtomicCmpSwapPat { + foreach RtnMode = ["ret", "noret"] in { + + defvar Op = !cast("AMDGPUatomic_cmp_swap_global_" # RtnMode + # "_" # vt.Size); + defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + + defvar OffsetResDag = (!cast(Inst # "_OFFSET" # InstSuffix) + getVregSrcForVT.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset); + def : Pat< + (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), data_vt:$vdata_in)), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG OffsetResDag, !if(!eq(vt, i32), sub0, sub0_sub1)), + OffsetResDag) + >; + + defvar Addr64ResDag = (!cast(Inst # "_ADDR64" # InstSuffix) + getVregSrcForVT.ret:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset); + def : Pat< + (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), + data_vt:$vdata_in)), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG Addr64ResDag, !if(!eq(vt, i32), sub0, sub0_sub1)), + Addr64ResDag) + >; + + } // end foreach RtnMode +} + foreach Ty = [i32, i64] in { defvar Suffix = !if(!eq(Ty, i64), "_X2", ""); @@ -1418,6 +1449,9 @@ } // end foreach Ty +defm : BufferAtomicCmpSwapPat; +defm : BufferAtomicCmpSwapPat; + multiclass SIBufferAtomicPat RtnModes = ["ret", "noret"]> { foreach RtnMode = RtnModes in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -28,7 +28,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -43,7 +43,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -112,7 +112,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -127,7 +127,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -218,7 +218,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -233,7 +233,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -302,7 +302,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -317,7 +317,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -418,7 +418,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -443,7 +443,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -534,7 +534,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:av_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -548,7 +548,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:av_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -611,7 +611,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:av_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} @@ -625,7 +625,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:av_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} @@ -687,7 +687,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -701,7 +701,7 @@ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -773,7 +773,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -787,7 +787,7 @@ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3