This is an archive of the discontinued LLVM Phabricator instance.

[amdgpu] Fix REL32 relocations with negative offsets.
ClosedPublic

Authored by hliao on Jun 19 2020, 2:03 PM.

Download Raw Diff

Details

Reviewers

rampitec
arsenm

Commits

rG20a1700293f6: [amdgpu] Fix REL32 relocations with negative offsets.

Summary

The offset should be treated as a signed one.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

hliao created this revision.Jun 19 2020, 2:03 PM

Herald added a project: Restricted Project. · View Herald TranscriptJun 19 2020, 2:03 PM

Herald added subscribers: llvm-commits, kerbowa, hiraditya and 8 others. · View Herald Transcript

This patch only handles the case where that offset is representable in a 32-bit signed integer. For a generic 64-bit offset out of range of 32-bit integer, we need to revise the relocation spec to enhance REL32_HI from the orginal

(S +A - P) >> 32

(S + (A << 32) - P) >> 32

where A is that 32-bit sword addend. In fact, we split a 64-bit offset into low 32-bit addend used in REL32_LO and high 32-bit addend used in REL32_HI. However, that needs changes in more than components. As that's the rate case, I want to address the current critical issue first and will start the discussion on relocation changes.

BTW, in the real example, that negative offset is created by LSR pass to reduce the code strength in a loop.

GlobalISel part isn't tested

Harbormaster failed remote builds in B61096: Diff 272176!Jun 19 2020, 3:14 PM

In D82234#2104634, @arsenm wrote:

GlobalISel part isn't tested

GlobalISel doesn't have this issue yet as it won't fold G_GLOBAL_VALUE and PTR_ADD together. So, it only observe positive offsets. But, we'd better change that interface as well.

If you could merge this with an existing test for this, that would be slightly preferable

This revision is now accepted and ready to land.Jun 20 2020, 1:26 PM

Closed by commit rG20a1700293f6: [amdgpu] Fix REL32 relocations with negative offsets. (authored by hliao). · Explain WhyJun 21 2020, 8:13 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

AMDGPULegalizerInfo.h

6 lines

AMDGPULegalizerInfo.cpp

10 lines

SIISelLowering.cpp

3 lines

test/

CodeGen/

AMDGPU/

rel32.ll

12 lines

Diff 272326

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	public:
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;
bool legalizeShuffleVector(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeShuffleVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;

bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;

bool buildPCRelGlobalAddress(		bool buildPCRelGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B,
Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,		const GlobalValue *GV, int64_t Offset,
unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const;		unsigned GAFlags = SIInstrInfo::MO_NONE) const;

bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;		MachineIRBuilder &B) const;
bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,		MachineIRBuilder &B,
GISelChangeObserver &Observer) const;		GISelChangeObserver &Observer) const;

bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,		bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
▲ Show 20 Lines • Show All 105 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Show First 20 Lines • Show All 1,997 Lines • ▼ Show 20 Lines	Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;		Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)		B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)
.addUse(TrigVal)		.addUse(TrigVal)
.setMIFlags(Flags);		.setMIFlags(Flags);
MI.eraseFromParent();		MI.eraseFromParent();
return true;		return true;
}		}

bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(		bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
Register DstReg, LLT PtrTy,		MachineIRBuilder &B,
MachineIRBuilder &B, const GlobalValue *GV,		const GlobalValue *GV,
unsigned Offset, unsigned GAFlags) const {		int64_t Offset,
		unsigned GAFlags) const {
		assert(isInt<32>(Offset + 4) && "32-bit offset is expected!");
// In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered		// In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered
// to the following code sequence:		// to the following code sequence:
//		//
// For constant address space:		// For constant address space:
// s_getpc_b64 s[0:1]		// s_getpc_b64 s[0:1]
// s_add_u32 s0, s0, $symbol		// s_add_u32 s0, s0, $symbol
// s_addc_u32 s1, s1, 0		// s_addc_u32 s1, s1, 0
//		//
▲ Show 20 Lines • Show All 2,368 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,455 Lines • ▼ Show 20 Lines	SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS \|\|		return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS \|\|
GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS \|\|		GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS \|\|
GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&		GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitGOTReloc(GA->getGlobal());		!shouldEmitGOTReloc(GA->getGlobal());
}		}

static SDValue		static SDValue
buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,		buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
const SDLoc &DL, unsigned Offset, EVT PtrVT,		const SDLoc &DL, int64_t Offset, EVT PtrVT,
unsigned GAFlags = SIInstrInfo::MO_NONE) {		unsigned GAFlags = SIInstrInfo::MO_NONE) {
		assert(isInt<32>(Offset + 4) && "32-bit offset is expected!");
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is		// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:		// lowered to the following code sequence:
//		//
// For constant address space:		// For constant address space:
// s_getpc_b64 s[0:1]		// s_getpc_b64 s[0:1]
// s_add_u32 s0, s0, $symbol		// s_add_u32 s0, s0, $symbol
// s_addc_u32 s1, s1, 0		// s_addc_u32 s1, s1, 0
//		//
▲ Show 20 Lines • Show All 6,133 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/rel32.ll

This file was added.

				; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck %s

				@g = protected local_unnamed_addr addrspace(4) externally_initialized global i32 0, align 4

				; CHECK-LABEL: rel32_neg_offset:
				; CHECK: s_getpc_b64 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{]}}
				; CHECK: s_add_u32 s[[LO]], s[[LO]], g@rel32@lo-4
				; CHECK: s_addc_u32 s[[HI]], s[[HI]], g@rel32@hi-4
				define i32 addrspace(4)* @rel32_neg_offset() {
				%r = getelementptr i32, i32 addrspace(4)* @g, i64 -2
				ret i32 addrspace(4)* %r
				}