Diff 343200

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Show First 20 Lines • Show All 1,789 Lines • ▼ Show 20 Lines	bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {		if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();		int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();		const SIInstrInfo *TII = Subtarget->getInstrInfo();

if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,		if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal)) {		SIInstrFlags::FlatGlobal)) {
Addr = LHS;		Addr = LHS;
ImmOffset = COffsetVal;		ImmOffset = COffsetVal;
} else if (!LHS->isDivergent() && COffsetVal > 0) {		} else if (!LHS->isDivergent()) {
		if (COffsetVal > 0) {
SDLoc SL(N);		SDLoc SL(N);
// saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) +		// saddr + large_offset -> saddr +
		// (voffset = large_offset & ~MaxOffset) +
// (large_offset & MaxOffset);		// (large_offset & MaxOffset);
int64_t SplitImmOffset, RemainderOffset;		int64_t SplitImmOffset, RemainderOffset;
std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(		std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);		COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);

if (isUInt<32>(RemainderOffset)) {		if (isUInt<32>(RemainderOffset)) {
SDNode *VMov = CurDAG->getMachineNode(		SDNode *VMov = CurDAG->getMachineNode(
AMDGPU::V_MOV_B32_e32, SL, MVT::i32,		AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));		CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
VOffset = SDValue(VMov, 0);		VOffset = SDValue(VMov, 0);
SAddr = LHS;		SAddr = LHS;
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);		Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
return true;		return true;
}		}
}		}
}

// Match the variable offset.		// We are adding a 64 bit SGPR and a constant. If constant bus limit
if (Addr.getOpcode() != ISD::ADD) {		// is 1 we would need to perform 1 or 2 extra moves for each half of
if (Addr->isDivergent() \|\| Addr.getOpcode() == ISD::UNDEF \|\|		// the constant and it is better to do a scalar add and then issue a
isa<ConstantSDNode>(Addr))		// single VALU instruction to materialize zero. Otherwise it is less
		// instructions to perform VALU adds with immediates or inline literals.
		unsigned NumLiterals =
		!TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
		!TII->isInlineConstant(APInt(32, COffsetVal >> 32));
		if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
return false;		return false;
		}
// It's cheaper to materialize a single 32-bit zero for vaddr than the two
// moves required to copy a 64-bit SGPR to VGPR.
SAddr = Addr;
SDNode *VMov = CurDAG->getMachineNode(
AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
VOffset = SDValue(VMov, 0);
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
return true;
}		}

		// Match the variable offset.
		if (Addr.getOpcode() == ISD::ADD) {
LHS = Addr.getOperand(0);		LHS = Addr.getOperand(0);
RHS = Addr.getOperand(1);		RHS = Addr.getOperand(1);

if (!LHS->isDivergent()) {		if (!LHS->isDivergent()) {
// add (i64 sgpr), (zero_extend (i32 vgpr))		// add (i64 sgpr), (zero_extend (i32 vgpr))
if (SDValue ZextRHS = matchZExtFromI32(RHS)) {		if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
SAddr = LHS;		SAddr = LHS;
VOffset = ZextRHS;		VOffset = ZextRHS;
}		}
}		}

if (!SAddr && !RHS->isDivergent()) {		if (!SAddr && !RHS->isDivergent()) {
// add (zero_extend (i32 vgpr)), (i64 sgpr)		// add (zero_extend (i32 vgpr)), (i64 sgpr)
if (SDValue ZextLHS = matchZExtFromI32(LHS)) {		if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
SAddr = RHS;		SAddr = RHS;
VOffset = ZextLHS;		VOffset = ZextLHS;
}		}
}		}

if (!SAddr)		if (SAddr) {
		Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
		return true;
		}
		}

		if (Addr->isDivergent() \|\| Addr.getOpcode() == ISD::UNDEF \|\|
		isa<ConstantSDNode>(Addr))
return false;		return false;

		// It's cheaper to materialize a single 32-bit zero for vaddr than the two
		// moves required to copy a 64-bit SGPR to VGPR.
		SAddr = Addr;
		SDNode *VMov =
		CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
		CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
		VOffset = SDValue(VMov, 0);
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);		Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
return true;		return true;
}		}

static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {		static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {		if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));		SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
} else if (SAddr.getOpcode() == ISD::ADD &&		} else if (SAddr.getOpcode() == ISD::ADD &&
▲ Show 20 Lines • Show All 1,284 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Show First 20 Lines • Show All 3,522 Lines • ▼ Show 20 Lines	AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
// possible.		// possible.
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);		std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

if (ConstOffset != 0) {		if (ConstOffset != 0) {
if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS,		if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS,
SIInstrFlags::FlatGlobal)) {		SIInstrFlags::FlatGlobal)) {
Addr = PtrBase;		Addr = PtrBase;
ImmOffset = ConstOffset;		ImmOffset = ConstOffset;
} else if (ConstOffset > 0) {		} else {
auto PtrBaseDef = getDefSrcRegIgnoringCopies(PtrBase, *MRI);		auto PtrBaseDef = getDefSrcRegIgnoringCopies(PtrBase, *MRI);
if (!PtrBaseDef)		if (!PtrBaseDef)
return None;		return None;

if (isSGPR(PtrBaseDef->Reg)) {		if (isSGPR(PtrBaseDef->Reg)) {
		if (ConstOffset > 0) {
// Offset is too large.		// Offset is too large.
//		//
// saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset)		// saddr + large_offset -> saddr +
// + (large_offset & MaxOffset);		// (voffset = large_offset & ~MaxOffset) +
		// (large_offset & MaxOffset);
int64_t SplitImmOffset, RemainderOffset;		int64_t SplitImmOffset, RemainderOffset;
std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset(		std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset(
ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);		ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);

if (isUInt<32>(RemainderOffset)) {		if (isUInt<32>(RemainderOffset)) {
MachineInstr *MI = Root.getParent();		MachineInstr *MI = Root.getParent();
MachineBasicBlock *MBB = MI->getParent();		MachineBasicBlock *MBB = MI->getParent();
Register HighBits		Register HighBits =
= MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);		MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),		BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)		HighBits)
.addImm(RemainderOffset);		.addImm(RemainderOffset);

return {{		return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); }, // saddr		[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); }, // saddr
[=](MachineInstrBuilder &MIB) { MIB.addReg(HighBits); }, // voffset		[=](MachineInstrBuilder &MIB) {
		MIB.addReg(HighBits);
		}, // voffset
[=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); },		[=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); },
}};		}};
}		}
}		}

		// We are adding a 64 bit SGPR and a constant. If constant bus limit
		// is 1 we would need to perform 1 or 2 extra moves for each half of
		// the constant and it is better to do a scalar add and then issue a
		// single VALU instruction to materialize zero. Otherwise it is less
		// instructions to perform VALU adds with immediates or inline literals.
		unsigned NumLiterals =
		!TII.isInlineConstant(APInt(32, ConstOffset & 0xffffffff)) +
		!TII.isInlineConstant(APInt(32, ConstOffset >> 32));
		if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
		return None;
		}
}		}
}		}

auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);		auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
if (!AddrDef)		if (!AddrDef)
return None;		return None;

// Match the variable offset.		// Match the variable offset.
if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD) {		if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
// FIXME: We should probably have folded COPY (G_IMPLICIT_DEF) earlier, and
// drop this.
if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF \|\|
AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT)
return None;

// It's cheaper to materialize a single 32-bit zero for vaddr than the two
// moves required to copy a 64-bit SGPR to VGPR.
const Register SAddr = AddrDef->Reg;
if (!isSGPR(SAddr))
return None;

MachineInstr *MI = Root.getParent();
MachineBasicBlock *MBB = MI->getParent();
Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
VOffset)
.addImm(0);

return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, // saddr
[=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, // voffset
[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
}};
}

// Look through the SGPR->VGPR copy.		// Look through the SGPR->VGPR copy.
Register SAddr =		Register SAddr =
getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI);		getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI);
if (!SAddr \|\| !isSGPR(SAddr))
return None;

		if (SAddr && isSGPR(SAddr)) {
Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();		Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();

// It's possible voffset is an SGPR here, but the copy to VGPR will be		// It's possible voffset is an SGPR here, but the copy to VGPR will be
// inserted later.		// inserted later.
Register VOffset = matchZeroExtendFromS32(*MRI, PtrBaseOffset);		if (Register VOffset = matchZeroExtendFromS32(*MRI, PtrBaseOffset)) {
if (!VOffset)
return None;

return {{[=](MachineInstrBuilder &MIB) { // saddr		return {{[=](MachineInstrBuilder &MIB) { // saddr
MIB.addReg(SAddr);		MIB.addReg(SAddr);
},		},
[=](MachineInstrBuilder &MIB) { // voffset		[=](MachineInstrBuilder &MIB) { // voffset
MIB.addReg(VOffset);		MIB.addReg(VOffset);
},		},
[=](MachineInstrBuilder &MIB) { // offset		[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(ImmOffset);		MIB.addImm(ImmOffset);
}}};		}}};
}		}
		}
		}

		// FIXME: We should probably have folded COPY (G_IMPLICIT_DEF) earlier, and
		// drop this.
		if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF \|\|
		AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT \|\| !isSGPR(AddrDef->Reg))
		return None;

		// It's cheaper to materialize a single 32-bit zero for vaddr than the two
		// moves required to copy a 64-bit SGPR to VGPR.
		MachineInstr *MI = Root.getParent();
		MachineBasicBlock *MBB = MI->getParent();
		Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);

		BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
		.addImm(0);

		return {{
		[=](MachineInstrBuilder &MIB) { MIB.addReg(AddrDef->Reg); }, // saddr
		[=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, // voffset
		[=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
		}};
		}

InstructionSelector::ComplexRendererFns		InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {		AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
Register Addr = Root.getReg();		Register Addr = Root.getReg();
Register PtrBase;		Register PtrBase;
int64_t ConstOffset;		int64_t ConstOffset;
int64_t ImmOffset = 0;		int64_t ImmOffset = 0;

▲ Show 20 Lines • Show All 760 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir

Show First 20 Lines • Show All 344 Lines • ▼ Show 20 Lines	bb.0:
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1		; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0		; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0		; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1		; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1		; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc		; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc		; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1		; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]		; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)		; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]		; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097		; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097
; GFX10: liveins: $sgpr0_sgpr1		; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1		; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199		; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199
; GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1		; GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1		; GFX10: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0		; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0		; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines	bb.0:
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1		; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0		; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0		; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1		; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1		; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc		; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc		; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1		; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]		; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)		; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]		; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390		; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
; GFX10: liveins: $sgpr0_sgpr1		; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1		; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094		; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094
; GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1		; GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1		; GFX10: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0		; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0		; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
▲ Show 20 Lines • Show All 164 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/global-saddr-load.ll

Show First 20 Lines • Show All 81 Lines • ▼ Show 20 Lines
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:-4096		; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] offset:-4096
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog		; GFX9-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: global_load_saddr_i8_offset_neg4096:		; GFX10-LABEL: global_load_saddr_i8_offset_neg4096:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2		; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]		; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off		; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
		arsenmUnsubmitted Done Reply Inline Actions This is more instructions arsenm: This is more instructions
		rampitecAuthorUnsubmitted Done Reply Inline Actions It is one materialized zero, I think it is more or less degenarate case. In a real world there is usually a vgpr with zero. On the other hand this is one VGPR less. rampitec: It is one materialized zero, I think it is more or less degenarate case. In a real world there…
		arsenmUnsubmitted Done Reply Inline Actions This is the same pattern in all of the cases with a large immediate offset split. I think this is reasonably common and I would lean towards fewer instructions here arsenm: This is the same pattern in all of the cases with a large immediate offset split. I think this…
		rampitecAuthorUnsubmitted Done Reply Inline Actions We still need to materialize zero, but I got the idea, will check what is possible to do here. One caveat with non zero vaddr is it will be impossible to switch to vaddr form if calculations will end up in sgprs (and they likely will in these cases) but for any reason we would need to move instruction to VALU which in turn will result in readfirstlane instructions for the address, like it is now. rampitec: We still need to materialize zero, but I got the idea, will check what is possible to do here.
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 -4096		%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 -4096
%load = load i8, i8 addrspace(1)* %gep0		%load = load i8, i8 addrspace(1)* %gep0
%zext = zext i8 %load to i32		%zext = zext i8 %load to i32
%to.vgpr = bitcast i32 %zext to float		%to.vgpr = bitcast i32 %zext to float
ret float %to.vgpr		ret float %to.vgpr
}		}

; SGPR base with maximum negative gfx9 immediate offset -1		; SGPR base with maximum negative gfx9 immediate offset -1
define amdgpu_ps float @global_load_saddr_i8_offset_neg4097(i8 addrspace(1)* inreg %sbase) {		define amdgpu_ps float @global_load_saddr_i8_offset_neg4097(i8 addrspace(1)* inreg %sbase) {
; GFX9-LABEL: global_load_saddr_i8_offset_neg4097:		; GFX9-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s2		; GFX9-NEXT: s_add_u32 s0, s2, 0xffffefff
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0		; GFX9-NEXT: s_addc_u32 s1, s3, -1
; GFX9-NEXT: v_mov_b32_e32 v1, s3		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog		; GFX9-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: global_load_saddr_i8_offset_neg4097:		; GFX10-LABEL: global_load_saddr_i8_offset_neg4097:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2		; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]		; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1		; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 -4097		%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 -4097
%load = load i8, i8 addrspace(1)* %gep0		%load = load i8, i8 addrspace(1)* %gep0
%zext = zext i8 %load to i32		%zext = zext i8 %load to i32
%to.vgpr = bitcast i32 %zext to float		%to.vgpr = bitcast i32 %zext to float
ret float %to.vgpr		ret float %to.vgpr
}		}

; SGPR base with maximum negative gfx9 immediate offset -2		; SGPR base with maximum negative gfx9 immediate offset -2
define amdgpu_ps float @global_load_saddr_i8_offset_neg4098(i8 addrspace(1)* inreg %sbase) {		define amdgpu_ps float @global_load_saddr_i8_offset_neg4098(i8 addrspace(1)* inreg %sbase) {
; GFX9-LABEL: global_load_saddr_i8_offset_neg4098:		; GFX9-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s2		; GFX9-NEXT: s_add_u32 s0, s2, 0xffffeffe
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0		; GFX9-NEXT: s_addc_u32 s1, s3, -1
; GFX9-NEXT: v_mov_b32_e32 v1, s3		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog		; GFX9-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: global_load_saddr_i8_offset_neg4098:		; GFX10-LABEL: global_load_saddr_i8_offset_neg4098:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2		; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]		; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2		; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2
▲ Show 20 Lines • Show All 200 Lines • ▼ Show 20 Lines	; GFX10-NEXT: ; return to shader part epilog
%zext = zext i8 %load to i32		%zext = zext i8 %load to i32
%to.vgpr = bitcast i32 %zext to float		%to.vgpr = bitcast i32 %zext to float
ret float %to.vgpr		ret float %to.vgpr
}		}

define amdgpu_ps float @global_load_saddr_i8_offset_4294971391(i8 addrspace(1)* inreg %sbase) {		define amdgpu_ps float @global_load_saddr_i8_offset_4294971391(i8 addrspace(1)* inreg %sbase) {
; GFX9-LABEL: global_load_saddr_i8_offset_4294971391:		; GFX9-LABEL: global_load_saddr_i8_offset_4294971391:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v1, s3		; GFX9-NEXT: s_add_u32 s0, s2, 0xfff
; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s2		; GFX9-NEXT: s_addc_u32 s1, s3, 1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog		; GFX9-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: global_load_saddr_i8_offset_4294971391:		; GFX10-LABEL: global_load_saddr_i8_offset_4294971391:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2		; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]		; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047		; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog		; GFX10-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 4294971391		%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 4294971391
%load = load i8, i8 addrspace(1)* %gep0		%load = load i8, i8 addrspace(1)* %gep0
%zext = zext i8 %load to i32		%zext = zext i8 %load to i32
%to.vgpr = bitcast i32 %zext to float		%to.vgpr = bitcast i32 %zext to float
ret float %to.vgpr		ret float %to.vgpr
}		}

define amdgpu_ps float @global_load_saddr_i8_offset_4294971392(i8 addrspace(1)* inreg %sbase) {		define amdgpu_ps float @global_load_saddr_i8_offset_4294971392(i8 addrspace(1)* inreg %sbase) {
; GFX9-LABEL: global_load_saddr_i8_offset_4294971392:		; GFX9-LABEL: global_load_saddr_i8_offset_4294971392:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s2		; GFX9-NEXT: s_add_u32 s0, s2, 0x1000
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0		; GFX9-NEXT: s_addc_u32 s1, s3, 1
; GFX9-NEXT: v_mov_b32_e32 v1, s3		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog		; GFX9-NEXT: ; return to shader part epilog
;		;
; GFX10-LABEL: global_load_saddr_i8_offset_4294971392:		; GFX10-LABEL: global_load_saddr_i8_offset_4294971392:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x1000, s2		; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x1000, s2
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]		; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off		; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
▲ Show 20 Lines • Show All 1,991 Lines • ▼ Show 20 Lines	; GCN-NEXT: ; return to shader part epilog
%addr = inttoptr i64 %or to i8 addrspace(1)*		%addr = inttoptr i64 %or to i8 addrspace(1)*
%load = load i8, i8 addrspace(1)* %addr		%load = load i8, i8 addrspace(1)* %addr
%zext = zext i8 %load to i32		%zext = zext i8 %load to i32
%to.vgpr = bitcast i32 %zext to float		%to.vgpr = bitcast i32 %zext to float
ret float %to.vgpr		ret float %to.vgpr
}		}

; --------------------------------------------------------------------------------		; --------------------------------------------------------------------------------
; Full 64-bit scalar add.		; Full 64-bit scalar add.
		foadUnsubmitted Done Reply Inline Actions Could you pre-commit this bit so we can see the diff please? foad: Could you pre-commit this bit so we can see the diff please?
; --------------------------------------------------------------------------------		; --------------------------------------------------------------------------------

define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg) {		define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg) {
; GFX9-LABEL: global_addr_64bit_lsr_iv:		; GFX9-LABEL: global_addr_64bit_lsr_iv:
; GFX9: ; %bb.0: ; %bb		; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_mov_b64 s[0:1], 0		; GFX9-NEXT: s_mov_b64 s[0:1], 0
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: BB128_1: ; %bb3		; GFX9-NEXT: BB128_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1		; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_add_u32 s4, s2, s0		; GFX9-NEXT: s_add_u32 s4, s2, s0
; GFX9-NEXT: s_addc_u32 s5, s3, s1		; GFX9-NEXT: s_addc_u32 s5, s3, s1
; GFX9-NEXT: v_mov_b32_e32 v0, s4		; GFX9-NEXT: global_load_dword v1, v0, s[4:5] glc
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: global_load_dword v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, s0, 4		; GFX9-NEXT: s_add_u32 s0, s0, 4
; GFX9-NEXT: s_addc_u32 s1, s1, 0		; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400		; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX9-NEXT: s_cbranch_scc0 BB128_1		; GFX9-NEXT: s_cbranch_scc0 BB128_1
; GFX9-NEXT: ; %bb.2: ; %bb2		; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_addr_64bit_lsr_iv:		; GFX10-LABEL: global_addr_64bit_lsr_iv:
; GFX10: ; %bb.0: ; %bb		; GFX10: ; %bb.0: ; %bb
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_mov_b64 s[0:1], 0		; GFX10-NEXT: s_mov_b64 s[0:1], 0
; GFX10-NEXT: BB128_1: ; %bb3		; GFX10-NEXT: BB128_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1		; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
		; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_add_u32 s4, s2, s0		; GFX10-NEXT: s_add_u32 s4, s2, s0
; GFX10-NEXT: s_addc_u32 s5, s3, s1		; GFX10-NEXT: s_addc_u32 s5, s3, s1
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v1, s5
; GFX10-NEXT: s_add_u32 s0, s0, 4		; GFX10-NEXT: s_add_u32 s0, s0, 4
		; GFX10-NEXT: global_load_dword v1, v0, s[4:5] glc dlc
		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_addc_u32 s1, s1, 0		; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400		; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX10-NEXT: global_load_dword v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_cbranch_scc0 BB128_1		; GFX10-NEXT: s_cbranch_scc0 BB128_1
; GFX10-NEXT: ; %bb.2: ; %bb2		; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
bb:		bb:
br label %bb3		br label %bb3

bb2: ; preds = %bb3		bb2: ; preds = %bb3
ret void		ret void

bb3: ; preds = %bb3, %bb		bb3: ; preds = %bb3, %bb
%i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]		%i = phi i32 [ 0, %bb ], [ %i8, %bb3 ]
%i4 = zext i32 %i to i64		%i4 = zext i32 %i to i64
%i5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %i4		%i5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %i4
%i6 = load volatile float, float addrspace(1)* %i5, align 4		%i6 = load volatile float, float addrspace(1)* %i5, align 4
%i8 = add nuw nsw i32 %i, 1		%i8 = add nuw nsw i32 %i, 1
%i9 = icmp eq i32 %i8, 256		%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb3		br i1 %i9, label %bb2, label %bb3
}		}

		; Make sure we only have a single zero vaddr initialization.

define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* inreg %arg, float addrspace(1)* inreg %arg.1) {		define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* inreg %arg, float addrspace(1)* inreg %arg.1) {
; GFX9-LABEL: global_addr_64bit_lsr_iv_multiload:		; GFX9-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX9: ; %bb.0: ; %bb		; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_mov_b64 s[0:1], 0		; GFX9-NEXT: s_mov_b64 s[0:1], 0
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: BB129_1: ; %bb3		; GFX9-NEXT: BB129_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1		; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_add_u32 s4, s2, s0		; GFX9-NEXT: s_add_u32 s4, s2, s0
; GFX9-NEXT: s_addc_u32 s5, s3, s1		; GFX9-NEXT: s_addc_u32 s5, s3, s1
; GFX9-NEXT: v_mov_b32_e32 v0, s4		; GFX9-NEXT: global_load_dword v1, v0, s[4:5] glc
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: global_load_dword v2, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dword v2, v[0:1], off glc		; GFX9-NEXT: global_load_dword v1, v0, s[4:5] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_u32 s0, s0, 4		; GFX9-NEXT: s_add_u32 s0, s0, 4
; GFX9-NEXT: s_addc_u32 s1, s1, 0		; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400		; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX9-NEXT: ; kill: killed $vgpr0_vgpr1		; GFX9-NEXT: ; kill: killed $sgpr4 killed $sgpr5
; GFX9-NEXT: s_cbranch_scc0 BB129_1		; GFX9-NEXT: s_cbranch_scc0 BB129_1
; GFX9-NEXT: ; %bb.2: ; %bb2		; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_addr_64bit_lsr_iv_multiload:		; GFX10-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX10: ; %bb.0: ; %bb		; GFX10: ; %bb.0: ; %bb
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_mov_b64 s[0:1], 0		; GFX10-NEXT: s_mov_b64 s[0:1], 0
; GFX10-NEXT: BB129_1: ; %bb3		; GFX10-NEXT: BB129_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1		; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
		; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_add_u32 s4, s2, s0		; GFX10-NEXT: s_add_u32 s4, s2, s0
; GFX10-NEXT: s_addc_u32 s5, s3, s1		; GFX10-NEXT: s_addc_u32 s5, s3, s1
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v1, s5
; GFX10-NEXT: s_add_u32 s0, s0, 4		; GFX10-NEXT: s_add_u32 s0, s0, 4
; GFX10-NEXT: s_addc_u32 s1, s1, 0		; GFX10-NEXT: global_load_dword v1, v0, s[4:5] glc dlc
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX10-NEXT: ; kill: killed $vgpr0_vgpr1
; GFX10-NEXT: global_load_dword v2, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_dword v2, v[0:1], off glc dlc		; GFX10-NEXT: global_load_dword v1, v0, s[4:5] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
		; GFX10-NEXT: s_addc_u32 s1, s1, 0
		; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
		; GFX10-NEXT: ; kill: killed $sgpr4 killed $sgpr5
; GFX10-NEXT: s_cbranch_scc0 BB129_1		; GFX10-NEXT: s_cbranch_scc0 BB129_1
; GFX10-NEXT: ; %bb.2: ; %bb2		; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
bb:		bb:
br label %bb3		br label %bb3

bb2: ; preds = %bb3		bb2: ; preds = %bb3
ret void		ret void
Show All 15 Lines

llvm/test/CodeGen/AMDGPU/global_atomics.ll

Show All 35 Lines

; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:		; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac		; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd		; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}

; VI: flat_atomic_add		; VI: flat_atomic_add

; GFX9: v_mov_b32_e32 [[HIGH_K:v[0-9]+]], 0xabcd		; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac
; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xd000,		; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd
; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, [[HIGH_K]], v{{[0-9]+}}, vcc		; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[LOW_K]]:[[HIGH_K]]]{{$}}
; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3756{{$}}
define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {		define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
entry:		entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595		%gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595

%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:		; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}		; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}		; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {		define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:		entry:
%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s		; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
Show All 19 Lines	entry:
%val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst		%val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i32_addr64:		; GCN-LABEL: {{^}}atomic_add_i32_addr64:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 20 Lines	entry:
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}

; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_and [[RET:v[0-9]]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 19 Lines	entry:
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i32_addr64:		; GCN-LABEL: {{^}}atomic_and_i32_addr64:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}

; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 20 Lines	entry:
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}

; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 19 Lines	entry:
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i32_addr64:		; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}

; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 20 Lines	entry:
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}

; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 19 Lines	entry:
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i32_addr64:		; GCN-LABEL: {{^}}atomic_max_i32_addr64:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}

; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 19 Lines	entry:
%val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 18 Lines	entry:
%val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst		%val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i32_addr64:		; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 19 Lines	entry:
%val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16		; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16
define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 18 Lines	entry:
%val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst		%val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i32_addr64:		; GCN-LABEL: {{^}}atomic_min_i32_addr64:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 19 Lines	entry:
%val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst		%val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i32_addr64:		; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 19 Lines	entry:
%val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16		; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16
define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 18 Lines	entry:
%val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst		%val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i32_addr64:		; GCN-LABEL: {{^}}atomic_or_i32_addr64:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 30 Lines	entry:
%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst		%val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:		; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

Show All 21 Lines	entry:
store i32 %extract0, i32 addrspace(1)* %out2		store i32 %extract0, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}

; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}		; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: global_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16{{$}}		; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst		%val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword v[[RET]]		; SIVI: buffer_store_dword v[[RET]]

; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}		; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst		%val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
%extract0 = extractvalue { i32, i1 } %val, 0		%extract0 = extractvalue { i32, i1 } %val, 0
store i32 %extract0, i32 addrspace(1)* %out2		store i32 %extract0, i32 addrspace(1)* %out2
ret void		ret void
Show All 20 Lines	entry:
%extract0 = extractvalue { i32, i1 } %val, 0		%extract0 = extractvalue { i32, i1 } %val, 0
store i32 %extract0, i32 addrspace(1)* %out2		store i32 %extract0, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:		; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}		; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}		; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst		%val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword v[[RET]]		; SIVI: buffer_store_dword v[[RET]]

; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}}		; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst		%val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
%extract0 = extractvalue { i32, i1 } %val, 0		%extract0 = extractvalue { i32, i1 } %val, 0
store i32 %extract0, i32 addrspace(1)* %out2		store i32 %extract0, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 20 Lines	entry:
%val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}		; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}		; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst		%val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst		%val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i32_addr64:		; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}		; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off{{$}}		; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:		; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}		; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc{{$}}		; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst		%val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %val, i32 addrspace(1)* %out2		store i32 %val, i32 addrspace(1)* %out2
ret void		ret void
}		}

▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines	entry:
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}		; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
%val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4		%val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
store i32 %val, i32 addrspace(1)* %out		store i32 %val, i32 addrspace(1)* %out
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_load_i32_addr64:		; GCN-LABEL: {{^}}atomic_load_i32_addr64:
; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off glc{{$}}		; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
%val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4		%val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
store i32 %val, i32 addrspace(1)* %out		store i32 %val, i32 addrspace(1)* %out
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:		; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}		; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword [[RET]]		; SIVI: buffer_store_dword [[RET]]

; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}		; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr float, float addrspace(1)* %in, i64 %index		%ptr = getelementptr float, float addrspace(1)* %in, i64 %index
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4		%gep = getelementptr float, float addrspace(1)* %ptr, i64 4
%val = load atomic float, float addrspace(1)* %gep seq_cst, align 4		%val = load atomic float, float addrspace(1)* %gep seq_cst, align 4
store float %val, float addrspace(1)* %out		store float %val, float addrspace(1)* %out
ret void		ret void
}		}
Show All 27 Lines
entry:		entry:
store atomic float %in, float addrspace(1)* %out seq_cst, align 4		store atomic float %in, float addrspace(1)* %out seq_cst, align 4
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:		; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}		; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}		; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4		%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4		store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:		; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}		; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}		; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}		; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr float, float addrspace(1)* %out, i64 %index		%ptr = getelementptr float, float addrspace(1)* %out, i64 %index
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4		%gep = getelementptr float, float addrspace(1)* %ptr, i64 4
store atomic float %in, float addrspace(1)* %gep seq_cst, align 4		store atomic float %in, float addrspace(1)* %gep seq_cst, align 4
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_store_i32_addr64:		; GCN-LABEL: {{^}}atomic_store_i32_addr64:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}		; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}		; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index		%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4		store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_store_f32_addr64:		; GCN-LABEL: {{^}}atomic_store_f32_addr64:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}		; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}		; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr float, float addrspace(1)* %out, i64 %index		%ptr = getelementptr float, float addrspace(1)* %out, i64 %index
store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4		store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4
ret void		ret void
}		}

llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll

Show All 23 Lines	entry:
%tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}		; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i64_addr64:		; GCN-LABEL: {{^}}atomic_add_i64_addr64:
; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_add_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile add i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 18 Lines	entry:
%tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile and i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i64_addr64:		; GCN-LABEL: {{^}}atomic_and_i64_addr64:
; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_and_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile and i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 18 Lines	entry:
%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i64_addr64:		; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_sub_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile sub i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 18 Lines	entry:
%tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile max i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i64_addr64:		; GCN-LABEL: {{^}}atomic_max_i64_addr64:
; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_smax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 18 Lines	entry:
%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i64_addr64:		; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_umax_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 18 Lines	entry:
%tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile min i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i64_addr64:		; GCN-LABEL: {{^}}atomic_min_i64_addr64:
; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_smin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 19 Lines	entry:
%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i64_addr64:		; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_umin_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 18 Lines	entry:
%tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile or i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i64_addr64:		; GCN-LABEL: {{^}}atomic_or_i64_addr64:
; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_or_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile or i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 30 Lines	entry:
%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}		; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:		; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 18 Lines	entry:
%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}		; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}		; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst		%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %gep, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}
Show All 17 Lines	entry:
%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst		%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i64_addr64:		; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}		; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}}		; GFX9: global_atomic_xor_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}{{$}}
define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}		; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off glc{{$}}		; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {		define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst		%tmp0 = atomicrmw volatile xor i64 addrspace(1)* %ptr, i64 %in seq_cst
store i64 %tmp0, i64 addrspace(1)* %out2		store i64 %tmp0, i64 addrspace(1)* %out2
ret void		ret void
}		}

Show All 33 Lines	entry:
%extract0 = extractvalue { i64, i1 } %val, 0		%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2		store i64 %extract0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}		; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], off offset:32{{$}}		; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst		%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:		; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:		; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:

; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}}		; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst		%val = cmpxchg volatile i64 addrspace(1)* %gep, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0		%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2		store i64 %extract0, i64 addrspace(1)* %out2
ret void		ret void
Show All 19 Lines	entry:
%extract0 = extractvalue { i64, i1 } %val, 0		%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2		store i64 %extract0, i64 addrspace(1)* %out2
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:		; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}		; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}		; GFX9: global_atomic_cmpswap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst		%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:		; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:		; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]:

; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off glc{{$}}		; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {		define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst		%val = cmpxchg volatile i64 addrspace(1)* %ptr, i64 %old, i64 %in seq_cst seq_cst
%extract0 = extractvalue { i64, i1 } %val, 0		%extract0 = extractvalue { i64, i1 } %val, 0
store i64 %extract0, i64 addrspace(1)* %out2		store i64 %extract0, i64 addrspace(1)* %out2
ret void		ret void
}		}
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	entry:
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}}		; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
%val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8		%val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
store i64 %val, i64 addrspace(1)* %out		store i64 %val, i64 addrspace(1)* %out
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_load_i64_addr64:		; GCN-LABEL: {{^}}atomic_load_i64_addr64:
; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}		; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off glc{{$}}		; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
%val = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8		%val = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8
store i64 %val, i64 addrspace(1)* %out		store i64 %val, i64 addrspace(1)* %out
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:		; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:
; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}		; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}		; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]		; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}}		; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}}
define amdgpu_kernel void @atomic_load_f64_addr64_offset(double addrspace(1)* %in, double addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_load_f64_addr64_offset(double addrspace(1)* %in, double addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr double, double addrspace(1)* %in, i64 %index		%ptr = getelementptr double, double addrspace(1)* %in, i64 %index
%gep = getelementptr double, double addrspace(1)* %ptr, i64 4		%gep = getelementptr double, double addrspace(1)* %ptr, i64 4
%val = load atomic double, double addrspace(1)* %gep seq_cst, align 8		%val = load atomic double, double addrspace(1)* %gep seq_cst, align 8
store double %val, double addrspace(1)* %out		store double %val, double addrspace(1)* %out
ret void		ret void
}		}
Show All 17 Lines
entry:		entry:
store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8		store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:		; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}		; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off offset:32{{$}}		; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:32{{$}}
define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4		%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8		store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_store_i64_addr64:		; GCN-LABEL: {{^}}atomic_store_i64_addr64:
; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}		; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}		; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off{{$}}		; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index		%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8		store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8
ret void		ret void
}		}

; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:		; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:
; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}		; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}		; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off offset:32{{$}}		; GFX9: global_store_dwordx2 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:32{{$}}
define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double addrspace(1)* %out, i64 %index) {		define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double addrspace(1)* %out, i64 %index) {
entry:		entry:
%ptr = getelementptr double, double addrspace(1)* %out, i64 %index		%ptr = getelementptr double, double addrspace(1)* %out, i64 %index
%gep = getelementptr double, double addrspace(1)* %ptr, i64 4		%gep = getelementptr double, double addrspace(1)* %ptr, i64 4
store atomic double %in, double addrspace(1)* %gep seq_cst, align 8		store atomic double %in, double addrspace(1)* %gep seq_cst, align 8
ret void		ret void
}		}

llvm/test/CodeGen/AMDGPU/offset-split-global.ll

Show First 20 Lines • Show All 758 Lines • ▼ Show 20 Lines	; GFX10-NEXT: s_endpgm
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:		; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_addc_u32 s1, s1, -1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max:		; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines	; GFX10-NEXT: s_endpgm
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:		; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_addc_u32 s1, s1, -1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max:		; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0		; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0		; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc		; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off		; GFX10-NEXT: global_store_byte v[0:1], v0, off
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192		%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
%load = load volatile i8, i8 addrspace(1)* %gep, align 1		%load = load volatile i8, i8 addrspace(1)* %gep, align 1
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:		; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_addc_u32 s1, s1, -1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max:		; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Show All 9 Lines	; GFX10-NEXT: s_endpgm
ret void		ret void
}		}

; Fill 11-bit low-bits (1ull << 33) \| 2047		; Fill 11-bit low-bits (1ull << 33) \| 2047
define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:		; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0		; GFX9-NEXT: s_addc_u32 s1, s1, 2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0:		; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Show All 9 Lines	; GFX10-NEXT: s_endpgm
ret void		ret void
}		}

; Fill 11-bit low-bits (1ull << 33) \| 2048		; Fill 11-bit low-bits (1ull << 33) \| 2048
define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:		; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_add_u32 s0, s0, 0x800
; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0		; GFX9-NEXT: s_addc_u32 s1, s1, 2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1:		; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Show All 9 Lines	; GFX10-NEXT: s_endpgm
ret void		ret void
}		}

; Fill 12-bit low-bits (1ull << 33) \| 4095		; Fill 12-bit low-bits (1ull << 33) \| 4095
define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:		; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0		; GFX9-NEXT: s_addc_u32 s1, s1, 2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0:		; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Show All 9 Lines	; GFX10-NEXT: s_endpgm
ret void		ret void
}		}

; Fill 12-bit low-bits (1ull << 33) \| 4096		; Fill 12-bit low-bits (1ull << 33) \| 4096
define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:		; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1:		; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Show All 9 Lines	; GFX10-NEXT: s_endpgm
ret void		ret void
}		}

; Fill 13-bit low-bits (1ull << 33) \| 8191		; Fill 13-bit low-bits (1ull << 33) \| 8191
define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:		; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0:		; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Show All 9 Lines	; GFX10-NEXT: s_endpgm
ret void		ret void
}		}

; Fill 13-bit low-bits (1ull << 33) \| 8192		; Fill 13-bit low-bits (1ull << 33) \| 8192
define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:		; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
; GFX9-NEXT: v_mov_b32_e32 v1, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1:		; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Show All 9 Lines	; GFX10-NEXT: s_endpgm
ret void		ret void
}		}

; Fill 11-bit low-bits, negative high bits (1ull << 63) \| 2047		; Fill 11-bit low-bits, negative high bits (1ull << 63) \| 2047
define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:		; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
; GFX9-NEXT: v_mov_b32_e32 v2, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:		; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1		; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0		; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo		; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off		; GFX10-NEXT: global_store_byte v[0:1], v0, off
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761		%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
%load = load volatile i8, i8 addrspace(1)* %gep, align 1		%load = load volatile i8, i8 addrspace(1)* %gep, align 1
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

; Fill 11-bit low-bits, negative high bits (1ull << 63) \| 2048		; Fill 11-bit low-bits, negative high bits (1ull << 63) \| 2048
define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:		; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x800
; GFX9-NEXT: v_mov_b32_e32 v2, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:		; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1		; GFX10-NEXT: s_add_u32 s0, s0, 0x800
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0		; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo		; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off		; GFX10-NEXT: global_store_byte v[0:1], v0, off
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760		%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
%load = load volatile i8, i8 addrspace(1)* %gep, align 1		%load = load volatile i8, i8 addrspace(1)* %gep, align 1
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

; Fill 12-bit low-bits, negative high bits (1ull << 63) \| 4095		; Fill 12-bit low-bits, negative high bits (1ull << 63) \| 4095
define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:		; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
; GFX9-NEXT: v_mov_b32_e32 v2, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:		; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1		; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0		; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo		; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off		; GFX10-NEXT: global_store_byte v[0:1], v0, off
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713		%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
%load = load volatile i8, i8 addrspace(1)* %gep, align 1		%load = load volatile i8, i8 addrspace(1)* %gep, align 1
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

; Fill 12-bit low-bits, negative high bits (1ull << 63) \| 4096		; Fill 12-bit low-bits, negative high bits (1ull << 63) \| 4096
define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:		; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
; GFX9-NEXT: v_mov_b32_e32 v2, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:		; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1		; GFX10-NEXT: s_add_u32 s0, s0, 0x1000
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0		; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo		; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off		; GFX10-NEXT: global_store_byte v[0:1], v0, off
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712		%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
%load = load volatile i8, i8 addrspace(1)* %gep, align 1		%load = load volatile i8, i8 addrspace(1)* %gep, align 1
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

; Fill 13-bit low-bits, negative high bits (1ull << 63) \| 8191		; Fill 13-bit low-bits, negative high bits (1ull << 63) \| 8191
define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:		; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
; GFX9-NEXT: v_mov_b32_e32 v2, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:		; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1		; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0		; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo		; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off		; GFX10-NEXT: global_store_byte v[0:1], v0, off
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617		%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
%load = load volatile i8, i8 addrspace(1)* %gep, align 1		%load = load volatile i8, i8 addrspace(1)* %gep, align 1
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

; Fill 13-bit low-bits, negative high bits (1ull << 63) \| 8192		; Fill 13-bit low-bits, negative high bits (1ull << 63) \| 8192
define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {		define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:		; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
; GFX9: ; %bb.0:		; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1		; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)		; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0		; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
; GFX9-NEXT: v_mov_b32_e32 v2, s1		; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0		; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)		; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_byte v[0:1], v0, off		; GFX9-NEXT: global_store_byte v[0:1], v0, off
; GFX9-NEXT: s_endpgm		; GFX9-NEXT: s_endpgm
;		;
; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:		; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
; GFX10: ; %bb.0:		; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24		; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
		; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)		; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1		; GFX10-NEXT: s_add_u32 s0, s0, 0x2000
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0		; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo		; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)		; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_byte v[0:1], v0, off		; GFX10-NEXT: global_store_byte v[0:1], v0, off
; GFX10-NEXT: s_endpgm		; GFX10-NEXT: s_endpgm
%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616		%gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
%load = load volatile i8, i8 addrspace(1)* %gep, align 1		%load = load volatile i8, i8 addrspace(1)* %gep, align 1
store i8 %load, i8 addrspace(1)* undef		store i8 %load, i8 addrspace(1)* undef
ret void		ret void
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Improve global SADDR selection
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 343200

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir

llvm/test/CodeGen/AMDGPU/global-saddr-load.ll

llvm/test/CodeGen/AMDGPU/global_atomics.ll

llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll

llvm/test/CodeGen/AMDGPU/offset-split-global.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Improve global SADDR selectionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 343200

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir

llvm/test/CodeGen/AMDGPU/global-saddr-load.ll

llvm/test/CodeGen/AMDGPU/global_atomics.ll

llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll

llvm/test/CodeGen/AMDGPU/offset-split-global.ll

[AMDGPU] Improve global SADDR selection
ClosedPublic