Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1434,23 +1434,7 @@ if (HasVSrc) { Register VSrc = MI.getOperand(1).getReg(); - - if (STI.needsAlignedVGPRs()) { - // Add implicit aligned super-reg to force alignment on the data operand. - Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); - Register NewVR = - MRI->createVirtualRegister(&AMDGPU::VReg_64_Align2RegClass); - BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), NewVR) - .addReg(VSrc, 0, MI.getOperand(1).getSubReg()) - .addImm(AMDGPU::sub0) - .addReg(Undef) - .addImm(AMDGPU::sub1); - MIB.addReg(NewVR, 0, AMDGPU::sub0); - MIB.addReg(NewVR, RegState::Implicit); - } else { - MIB.addReg(VSrc); - } + MIB.addReg(VSrc); if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI)) return false; @@ -1459,6 +1443,8 @@ MIB.addImm(ImmOffset) .cloneMemRefs(MI); + TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0); + MI.eraseFromParent(); return true; } @@ -1753,7 +1739,9 @@ } MI.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr); + return true; } bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( Index: llvm/lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -478,6 +478,7 @@ if op.HAS_BASE then { def _V1 : MIMG_Store_Helper ; + let hasPostISelHook = 1 in def _V1_gfx90a : MIMG_Store_Helper_gfx90a ; def _V1_gfx10 : MIMG_Store_gfx10 ; + let hasPostISelHook = 1 in def _V1_gfx90a : MIMG_Atomic_gfx90a ; } if op.HAS_BASE then { Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4257,29 +4257,7 @@ case AMDGPU::DS_GWS_INIT: case AMDGPU::DS_GWS_SEMA_BR: case AMDGPU::DS_GWS_BARRIER: - if (Subtarget->needsAlignedVGPRs()) { - // Add implicit aligned super-reg to force alignment on the data operand. - const DebugLoc &DL = MI.getDebugLoc(); - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); - MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::data0); - Register DataReg = Op->getReg(); - bool IsAGPR = TRI->isAGPR(MRI, DataReg); - Register Undef = MRI.createVirtualRegister( - IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), Undef); - Register NewVR = - MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass - : &AMDGPU::VReg_64_Align2RegClass); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), NewVR) - .addReg(DataReg, 0, Op->getSubReg()) - .addImm(AMDGPU::sub0) - .addReg(Undef) - .addImm(AMDGPU::sub1); - Op->setReg(NewVR); - Op->setSubReg(AMDGPU::sub0); - MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); - } + TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0); LLVM_FALLTHROUGH; case AMDGPU::DS_GWS_SEMA_V: case AMDGPU::DS_GWS_SEMA_P: @@ -11832,8 +11810,11 @@ return; } - if (TII->isMIMG(MI) && !MI.mayStore()) - AddIMGInit(MI); + if (TII->isMIMG(MI)) { + if (!MI.mayStore()) + AddIMGInit(MI); + TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr); + } } static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1150,6 +1150,11 @@ static unsigned getDSShaderTypeValue(const MachineFunction &MF); const TargetSchedModel &getSchedModel() const { return SchedModel; } + + // Enforce operand's \p OpName even alignment if required by target. + // This is used if an operand is a 32 bit register but needs to be aligned + // regardless. + void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; }; /// \brief Returns true if a reg:subreg pair P has a TRC class Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4615,25 +4615,36 @@ } } - if (ST.needsAlignedVGPRs() && - (MI.getOpcode() == AMDGPU::DS_GWS_INIT || - MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || - MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) { - const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0); - Register Reg = Op->getReg(); - bool Aligned = true; - if (Reg.isPhysical()) { - Aligned = !(RI.getHWRegIndex(Reg) & 1); - } else { + if (ST.needsAlignedVGPRs()) { + const auto isAlignedReg = [&MI, &MRI, this](unsigned OpName) -> bool { + const MachineOperand *Op = getNamedOperand(MI, OpName); + if (!Op) + return true; + Register Reg = Op->getReg(); + if (Reg.isPhysical()) + return !(RI.getHWRegIndex(Reg) & 1); const TargetRegisterClass &RC = *MRI.getRegClass(Reg); - Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && - !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); + return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) && + !(RI.getChannelFromSubReg(Op->getSubReg()) & 1); + }; + + if (MI.getOpcode() == AMDGPU::DS_GWS_INIT || + MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || + MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) { + + if (!isAlignedReg(AMDGPU::OpName::data0)) { + ErrInfo = "Subtarget requires even aligned vector registers " + "for DS_GWS instructions"; + return false; + } } - if (!Aligned) { - ErrInfo = "Subtarget requires even aligned vector registers " - "for DS_GWS instructions"; - return false; + if (isMIMG(MI)) { + if (!isAlignedReg(AMDGPU::OpName::vaddr)) { + ErrInfo = "Subtarget requires even aligned vector registers " + "for vaddr operand of image instructions"; + return false; + } } } @@ -8425,3 +8436,37 @@ return false; } + +void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI, + unsigned OpName) const { + if (!ST.needsAlignedVGPRs()) + return; + + int OpNo = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); + if (OpNo < 0) + return; + MachineOperand &Op = MI.getOperand(OpNo); + if (getOpSize(MI, OpNo) > 4) + return; + + // Add implicit aligned super-reg to force alignment on the data operand. + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock *BB = MI.getParent(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + Register DataReg = Op.getReg(); + bool IsAGPR = RI.isAGPR(MRI, DataReg); + Register Undef = MRI.createVirtualRegister( + IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass); + BuildMI(*BB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef); + Register NewVR = + MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass + : &AMDGPU::VReg_64_Align2RegClass); + BuildMI(*BB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewVR) + .addReg(DataReg, 0, Op.getSubReg()) + .addImm(AMDGPU::sub0) + .addReg(Undef) + .addImm(AMDGPU::sub1); + Op.setReg(NewVR); + Op.setSubReg(AMDGPU::sub0); + MI.addOperand(MachineOperand::CreateReg(NewVR, false, true)); +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -1,9 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s - define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { ; GFX6-LABEL: atomic_swap_i32_1d: ; GFX6: ; %bb.0: ; %main_body @@ -33,6 +34,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_swap_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_swap_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_swap_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -81,6 +111,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -129,6 +188,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_sub_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_sub_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_sub_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -177,6 +265,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smin_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smin_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smin_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -225,6 +342,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umin_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umin_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umin_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -273,6 +419,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smax_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smax_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smax_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -321,6 +496,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umax_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umax_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umax_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -369,6 +573,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_and_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_and_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_and_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -417,6 +650,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_or_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_or_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_or_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -465,6 +727,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_xor_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_xor_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_xor_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -513,6 +804,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_inc_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_inc_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_inc_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -561,6 +881,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_dec_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_dec_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_dec_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -609,6 +958,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_cmpswap_i32_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_cmpswap_i32_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_cmpswap_i32_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -655,6 +1032,32 @@ ; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc ; GFX8-NEXT: s_endpgm ; +; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_endpgm +; +; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_endpgm +; ; GFX10-LABEL: atomic_cmpswap_i32_1d_no_return: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -701,6 +1104,36 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -749,6 +1182,37 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_3d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_3d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_3d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -797,6 +1261,37 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_cube: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_cube: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_cube: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -845,6 +1340,36 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_1darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_1darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_1darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -893,6 +1418,37 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -941,6 +1497,37 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2dmsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2dmsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: v_mov_b32_e32 v6, v3 +; GFX90A-NEXT: image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2dmsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -989,6 +1576,38 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_2darraymsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_2darraymsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v6, v1 +; GFX90A-NEXT: v_mov_b32_e32 v7, v2 +; GFX90A-NEXT: v_mov_b32_e32 v8, v3 +; GFX90A-NEXT: v_mov_b32_e32 v9, v4 +; GFX90A-NEXT: image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_2darraymsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1037,6 +1656,35 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i32_1d_slc: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i32_1d_slc: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: v_mov_b32_e32 v2, v1 +; GFX90A-NEXT: image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i32_1d_slc: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1085,6 +1733,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_swap_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_swap_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_swap_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1133,6 +1809,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1181,6 +1885,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_sub_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_sub_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_sub_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1229,6 +1961,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smin_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smin_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smin_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1277,6 +2037,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umin_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umin_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umin_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1325,6 +2113,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_smax_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_smax_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_smax_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1373,6 +2189,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_umax_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_umax_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_umax_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1421,6 +2265,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_and_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_and_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_and_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1469,6 +2341,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_or_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_or_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_or_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1517,6 +2417,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_xor_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_xor_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_xor_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1565,6 +2493,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_inc_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_inc_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_inc_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1613,6 +2569,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_dec_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_dec_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_dec_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1661,6 +2645,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_cmpswap_i64_1d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_cmpswap_i64_1d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_cmpswap_i64_1d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1707,6 +2719,32 @@ ; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc ; GFX8-NEXT: s_endpgm ; +; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX900-NEXT: s_endpgm +; +; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: s_endpgm +; ; GFX10-LABEL: atomic_cmpswap_i64_1d_no_return: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1753,6 +2791,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1801,6 +2867,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_3d: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_3d: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_3d: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1849,6 +2943,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_cube: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_cube: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_cube: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1897,6 +3019,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_1darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_1darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_1darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1945,6 +3095,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2darray: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2darray: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2darray: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -1993,6 +3171,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2dmsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2dmsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2dmsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -2041,6 +3247,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_2darraymsaa: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_2darraymsaa: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_2darraymsaa: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 @@ -2089,6 +3323,34 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ; return to shader part epilog ; +; GFX900-LABEL: atomic_add_i64_1d_slc: +; GFX900: ; %bb.0: ; %main_body +; GFX900-NEXT: s_mov_b32 s0, s2 +; GFX900-NEXT: s_mov_b32 s1, s3 +; GFX900-NEXT: s_mov_b32 s2, s4 +; GFX900-NEXT: s_mov_b32 s3, s5 +; GFX900-NEXT: s_mov_b32 s4, s6 +; GFX900-NEXT: s_mov_b32 s5, s7 +; GFX900-NEXT: s_mov_b32 s6, s8 +; GFX900-NEXT: s_mov_b32 s7, s9 +; GFX900-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: ; return to shader part epilog +; +; GFX90A-LABEL: atomic_add_i64_1d_slc: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_mov_b32 s0, s2 +; GFX90A-NEXT: s_mov_b32 s1, s3 +; GFX90A-NEXT: s_mov_b32 s2, s4 +; GFX90A-NEXT: s_mov_b32 s3, s5 +; GFX90A-NEXT: s_mov_b32 s4, s6 +; GFX90A-NEXT: s_mov_b32 s5, s7 +; GFX90A-NEXT: s_mov_b32 s6, s8 +; GFX90A-NEXT: s_mov_b32 s7, s9 +; GFX90A-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: ; return to shader part epilog +; ; GFX10-LABEL: atomic_add_i64_1d_slc: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: s_mov_b32 s0, s2 Index: llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir +++ llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir @@ -73,7 +73,7 @@ %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vgpr_32 = COPY $vgpr2 - %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %0.sub0, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec ... Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll @@ -1,10 +1,11 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX6789 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX6789 %s -; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX90A %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s ; GCN-LABEL: {{^}}atomic_swap_1d: ; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_swap v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -15,6 +16,7 @@ ; GCN-LABEL: {{^}}atomic_swap_1d_i64: ; GFX6789: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} +; GFX90A: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} ; GFX10: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps <2 x float> @atomic_swap_1d_i64(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { main_body: @@ -25,6 +27,7 @@ ; GCN-LABEL: {{^}}atomic_add_1d: ; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -35,6 +38,7 @@ ; GCN-LABEL: {{^}}atomic_sub_1d: ; GFX6789: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_sub v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -45,6 +49,7 @@ ; GCN-LABEL: {{^}}atomic_smin_1d: ; GFX6789: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_smin v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -55,6 +60,7 @@ ; GCN-LABEL: {{^}}atomic_umin_1d: ; GFX6789: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_umin v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -65,6 +71,7 @@ ; GCN-LABEL: {{^}}atomic_smax_1d: ; GFX6789: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_smax v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -75,6 +82,7 @@ ; GCN-LABEL: {{^}}atomic_umax_1d: ; GFX6789: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_umax v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -85,6 +93,7 @@ ; GCN-LABEL: {{^}}atomic_and_1d: ; GFX6789: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_and v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -95,6 +104,7 @@ ; GCN-LABEL: {{^}}atomic_or_1d: ; GFX6789: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_or v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -105,6 +115,7 @@ ; GCN-LABEL: {{^}}atomic_xor_1d: ; GFX6789: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_xor v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -115,6 +126,7 @@ ; GCN-LABEL: {{^}}atomic_inc_1d: ; GFX6789: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_inc v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -125,6 +137,7 @@ ; GCN-LABEL: {{^}}atomic_dec_1d: ; GFX6789: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_dec v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: @@ -135,6 +148,7 @@ ; GCN-LABEL: {{^}}atomic_cmpswap_1d: ; GFX6789: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} +; GFX90A: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}} ; GFX10: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { main_body: @@ -145,6 +159,7 @@ ; GCN-LABEL: {{^}}atomic_cmpswap_1d_64: ; GFX6789: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} +; GFX90A: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} ; GFX10: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; define amdgpu_ps <2 x float> @atomic_cmpswap_1d_64(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { main_body: @@ -155,6 +170,7 @@ ; GCN-LABEL: {{^}}atomic_add_2d: ; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[13579]}}], s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc ; define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { main_body: @@ -165,6 +181,7 @@ ; GCN-LABEL: {{^}}atomic_add_3d: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) { main_body: @@ -175,6 +192,7 @@ ; GCN-LABEL: {{^}}atomic_add_cube: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ; define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) { main_body: @@ -185,6 +203,7 @@ ; GCN-LABEL: {{^}}atomic_add_1darray: ; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[13579]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) { main_body: @@ -195,6 +214,7 @@ ; GCN-LABEL: {{^}}atomic_add_2darray: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) { main_body: @@ -205,6 +225,7 @@ ; GCN-LABEL: {{^}}atomic_add_2dmsaa: ; GFX6789: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[02468]}}], s[0:7] dmask:0x1 unorm glc{{$}} ; GFX10: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) { main_body: @@ -215,6 +236,7 @@ ; GCN-LABEL: {{^}}atomic_add_2darraymsaa: ; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}} +; GFX90A: image_atomic_add v0, v[{{[02468]}}:{{[13579]}}], s[0:7] dmask:0x1 unorm glc da{{$}} ; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc ; define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { main_body: @@ -225,6 +247,7 @@ ; GCN-LABEL: {{^}}atomic_add_1d_slc: ; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc{{$}} +; GFX90A: image_atomic_add v0, v{{[02468]}}, s[0:7] dmask:0x1 unorm glc slc{{$}} ; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc ; define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { main_body: Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}load_1d: ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} @@ -97,6 +98,16 @@ ret <4 x float> %v } +; GCN-LABEL: {{^}}load_1d_addr_align: +; GCN: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; GCN: image_load v[0:3], [[VADDR]], s[0:7] dmask:0xf unorm{{$}} +define amdgpu_ps <4 x float> @load_1d_addr_align(<8 x i32> inreg %rsrc, <2 x i32> %s) { +main_body: + %s1 = extractelement <2 x i32> %s, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s1, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + ; GCN-LABEL: {{^}}store_1d: ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}} define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { @@ -178,7 +189,8 @@ } ; GCN-LABEL: {{^}}store_1d_V1: -; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}} +; GCN: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; GCN: image_store v0, [[VADDR]], s[0:7] dmask:0x2 unorm{{$}} define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) { main_body: call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -256,7 +268,7 @@ } ; GCN-LABEL: image_load_mmo -; GCN: image_load v1, v[2:3], s[0:7] dmask:0x1 unorm +; GCN: image_load v1, v[{{[0-9:]+}}], s[0:7] dmask:0x1 unorm define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { store float 0.000000e+00, float addrspace(3)* %lds %c0 = extractelement <2 x i32> %c, i32 0 @@ -267,6 +279,15 @@ ret float %tex } +; GCN: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; GCN: image_get_resinfo v[0:3], [[VADDR]], s[0:7] dmask:0xf unorm +define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i32> %s) { +main_body: + %s1 = extractelement <2 x i32> %s, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %s1, <8 x i32> %rsrc, i32 0, i32 0) + ret <4 x float> %v +} + declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 @@ -301,6 +322,8 @@ declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0 declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0 +declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 + attributes #0 = { nounwind } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s ; GFX90A-LABEL: {{^}}sample_1d: ; GFX90A-NOT: s_wqm_b64 @@ -66,6 +67,20 @@ ret <4 x float> %v } +; Address register must be even aligned. + +; GFX90A-LABEL: {{^}}sample_1d_addr_align: +; GFX90A: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1 +; SDAG: image_sample v{{[0-9]+}}, [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0x1 +; GISEL: image_sample v[{{[0-9:]+}}], [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf +define amdgpu_ps float @sample_1d_addr_align(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x float> %s) { +main_body: + %s1 = extractelement <2 x float> %s, i32 1 + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + %v1 = extractelement <4 x float> %v, i32 0 + ret float %v1 +} + declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) Index: llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/verify-image-vaddr-align.mir @@ -0,0 +1,27 @@ +# RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX90A-ERR %s + +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: %4:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx90a %0.sub1:vreg_128_align2 +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: $vgpr0 = IMAGE_SAMPLE_V1_V1_gfx90a $vgpr1, +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: %5:vgpr_32 = IMAGE_LOAD_V1_V1_gfx90a %0.sub1:vreg_128_align2 +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: IMAGE_STORE_V1_V1_gfx90a $vgpr1, +# GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for vaddr operand of image instructions *** +# GFX90A-ERR: %6:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %6:vgpr_32(tied-def 0), $vgpr1, +--- +name: image_sample_odd_vgpr +body: | + bb.0: + %0:vreg_128_align2 = IMPLICIT_DEF + %1:areg_128_align2 = IMPLICIT_DEF + %2:sgpr_256 = IMPLICIT_DEF + %3:sgpr_128 = IMPLICIT_DEF + + %4:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx90a %0.sub1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr0 = IMAGE_SAMPLE_V1_V1_gfx90a $vgpr1, %2, %3, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %5:vgpr_32 = IMAGE_LOAD_V1_V1_gfx90a %0.sub1, %2, 8, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") + IMAGE_STORE_V1_V1_gfx90a $vgpr1, %5, %2, 2, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource") + %6:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %6:vgpr_32, $vgpr1, %2, 1, -1, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") +...