This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/
-
CodeGen/GlobalISel/
-
GlobalISel/
-
GISelKnownBits.cpp
-
Target/AMDGPU/
-
AMDGPU/
-
SIISelLowering.h
1
SIISelLowering.cpp
-
unittests/CodeGen/GlobalISel/
-
CodeGen/
-
GlobalISel/
-
KnownBitsTest.cpp

Differential D81297

AMDGPU: Implement computeKnownAlignForTargetInstr
ClosedPublic

Authored by arsenm on Jun 5 2020, 12:41 PM.

Download Raw Diff

Details

Reviewers

hliao
foad
kerbowa
nhaehnle

Summary

We probably need to move where intrinsics are lowered to copies to
make this useful.

Diff Detail

Event Timeline

arsenm created this revision.Jun 5 2020, 12:41 PM

Herald added a project: Restricted Project. · View Herald TranscriptJun 5 2020, 12:41 PM

Herald added subscribers: hiraditya, t-tye, tpr and 5 others. · View Herald Transcript

hliao added inline comments.Jun 8 2020, 7:40 AM

llvm/lib/Target/AMDGPU/SIISelLowering.cpp
11233	Try `Intrinsic::getAttributes`

Get alignmnent from intrinsic declaration. This can probably go in generic code, but I'm unclear what the semantics of a call site with a lower alignment is

In D81297#2085078, @arsenm wrote:

This can probably go in generic code, but I'm unclear what the semantics of a call site with a lower alignment is

IR has the same issue and the solution adopted in D65281 is that any attribute on the call site takes priority.

ping

Why not put it in generic code to start with? I don't understand why the concern about conflicting alignment would prevent that. I mean, are you more confident about how to handle conflicting alignments for AMDGPU than you are for all other targets?

In D81297#2097573, @foad wrote:

Why not put it in generic code to start with? I don't understand why the concern about conflicting alignment would prevent that. I mean, are you more confident about how to handle conflicting alignments for AMDGPU than you are for all other targets?

I know in this case, even if the call site specified a lower alignment, you're always getting at least 4. For other targets, I don't know if a lower call site alignment would need to be respected. Ultimately we need an assert_align pseudo to track the call site attribute

I'd still be inclined to either follow the precedent of IR, or take the max of the two alignments. But I guess this is OK if you're reluctant to do that.

This revision is now accepted and ready to land.Jun 17 2020, 6:13 AM

In D81297#2097987, @foad wrote:

I'd still be inclined to either follow the precedent of IR, or take the max of the two alignments. But I guess this is OK if you're reluctant to do that.

From the context of just the intrinsic itself, we don't have the option

95605b784b350b9c51fd4f15f312b50566cab30d

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

GlobalISel/

GISelKnownBits.cpp

2 lines

Target/

AMDGPU/

SIISelLowering.h

3 lines

SIISelLowering.cpp

22 lines

unittests/

CodeGen/

GlobalISel/

KnownBitsTest.cpp

32 lines

Diff 269844

llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp

	Show All 28 Lines

	GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)			GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
	: MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),			: MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
	DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {}			DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {}

	Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {			Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
	const MachineInstr *MI = MRI.getVRegDef(R);			const MachineInstr *MI = MRI.getVRegDef(R);
	switch (MI->getOpcode()) {			switch (MI->getOpcode()) {
				case TargetOpcode::COPY:
				return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
	case TargetOpcode::G_FRAME_INDEX: {			case TargetOpcode::G_FRAME_INDEX: {
	int FrameIdx = MI->getOperand(1).getIndex();			int FrameIdx = MI->getOperand(1).getIndex();
	return MF.getFrameInfo().getObjectAlign(FrameIdx);			return MF.getFrameInfo().getObjectAlign(FrameIdx);
	}			}
	case TargetOpcode::G_INTRINSIC:			case TargetOpcode::G_INTRINSIC:
	case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:			case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
	default:			default:
	return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);			return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
	▲ Show 20 Lines • Show All 455 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.h

Show First 20 Lines • Show All 402 Lines • ▼ Show 20 Lines	SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
SDValue V) const;		SDValue V) const;

void finalizeLowering(MachineFunction &MF) const override;		void finalizeLowering(MachineFunction &MF) const override;

void computeKnownBitsForFrameIndex(int FrameIdx,		void computeKnownBitsForFrameIndex(int FrameIdx,
KnownBits &Known,		KnownBits &Known,
const MachineFunction &MF) const override;		const MachineFunction &MF) const override;

		Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R,
		const MachineRegisterInfo &MRI,
		unsigned Depth = 0) const override;
bool isSDNodeSourceOfDivergence(const SDNode *N,		bool isSDNodeSourceOfDivergence(const SDNode *N,
FunctionLoweringInfo FLI, LegacyDivergenceAnalysis DA) const override;		FunctionLoweringInfo FLI, LegacyDivergenceAnalysis DA) const override;

bool isCanonicalized(SelectionDAG &DAG, SDValue Op,		bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth = 5) const;		unsigned MaxDepth = 5) const;
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;		bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;

bool isKnownNeverNaNForTargetNode(SDValue Op,		bool isKnownNeverNaNForTargetNode(SDValue Op,
▲ Show 20 Lines • Show All 45 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 29 Lines
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"		#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"		#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"		#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"		#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"		#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/DAGCombine.h"		#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"		#include "llvm/CodeGen/ISDOpcodes.h"
		#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"		#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"		#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"		#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"		#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"		#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"		#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"		#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"		#include "llvm/CodeGen/MachineModuleInfo.h"
▲ Show 20 Lines • Show All 11,166 Lines • ▼ Show 20 Lines	void SITargetLowering::computeKnownBitsForFrameIndex(
TargetLowering::computeKnownBitsForFrameIndex(FI, Known, MF);		TargetLowering::computeKnownBitsForFrameIndex(FI, Known, MF);

// Set the high bits to zero based on the maximum allowed scratch size per		// Set the high bits to zero based on the maximum allowed scratch size per
// wave. We can't use vaddr in MUBUF instructions if we don't know the address		// wave. We can't use vaddr in MUBUF instructions if we don't know the address
// calculation won't overflow, so assume the sign bit is never set.		// calculation won't overflow, so assume the sign bit is never set.
Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());		Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
}		}

		Align SITargetLowering::computeKnownAlignForTargetInstr(
		GISelKnownBits &KB, Register R, const MachineRegisterInfo &MRI,
		unsigned Depth) const {
		const MachineInstr *MI = MRI.getVRegDef(R);
		switch (MI->getOpcode()) {
		case AMDGPU::G_INTRINSIC:
		case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
		// FIXME: Can this move to generic code? What about the case where the call
		// site specifies a lower alignment?
		Intrinsic::ID IID = MI->getIntrinsicID();
		LLVMContext &Ctx = KB.getMachineFunction().getFunction().getContext();
		AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
		if (MaybeAlign RetAlign = Attrs.getRetAlignment())
		hliaoUnsubmitted Not Done Reply Inline Actions Try `Intrinsic::getAttributes` hliao: Try `Intrinsic::getAttributes`
		return *RetAlign;
		return Align(1);
		}
		default:
		return Align(1);
		}
		}

Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {		Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML);		const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
const Align CacheLineAlign = Align(64);		const Align CacheLineAlign = Align(64);

// Pre-GFX10 target did not benefit from loop alignment		// Pre-GFX10 target did not benefit from loop alignment
if (!ML \|\| DisableLoopAlignment \|\|		if (!ML \|\| DisableLoopAlignment \|\|
(getSubtarget()->getGeneration() < AMDGPUSubtarget::GFX10) \|\|		(getSubtarget()->getGeneration() < AMDGPUSubtarget::GFX10) \|\|
getSubtarget()->hasInstFwdPrefetchBug())		getSubtarget()->hasInstFwdPrefetchBug())
▲ Show 20 Lines • Show All 282 Lines • Show Last 20 Lines

llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp

Show First 20 Lines • Show All 425 Lines • ▼ Show 20 Lines	TEST_F(AMDGPUGISelMITest, TestNumSignBitsTrunc) {

GISelKnownBits Info(*MF);		GISelKnownBits Info(*MF);

EXPECT_EQ(24u, Info.computeNumSignBits(CopyLoadUByte));		EXPECT_EQ(24u, Info.computeNumSignBits(CopyLoadUByte));
EXPECT_EQ(25u, Info.computeNumSignBits(CopyLoadSByte));		EXPECT_EQ(25u, Info.computeNumSignBits(CopyLoadSByte));
EXPECT_EQ(16u, Info.computeNumSignBits(CopyLoadUShort));		EXPECT_EQ(16u, Info.computeNumSignBits(CopyLoadUShort));
EXPECT_EQ(17u, Info.computeNumSignBits(CopyLoadSShort));		EXPECT_EQ(17u, Info.computeNumSignBits(CopyLoadSShort));
}		}

		TEST_F(AMDGPUGISelMITest, TestTargetKnownAlign) {
		StringRef MIRString =
		" %5:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.dispatch.ptr)\n"
		" %6:_(p4) = COPY %5\n"
		" %7:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.queue.ptr)\n"
		" %8:_(p4) = COPY %7\n"
		" %9:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)\n"
		" %10:_(p4) = COPY %9\n"
		" %11:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.implicitarg.ptr)\n"
		" %12:_(p4) = COPY %11\n"
		" %13:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.implicit.buffer.ptr)\n"
		" %14:_(p4) = COPY %13\n";

		setUp(MIRString);
		if (!TM)
		return;

		Register CopyDispatchPtr = Copies[Copies.size() - 5];
		Register CopyQueuePtr = Copies[Copies.size() - 4];
		Register CopyKernargSegmentPtr = Copies[Copies.size() - 3];
		Register CopyImplicitArgPtr = Copies[Copies.size() - 2];
		Register CopyImplicitBufferPtr = Copies[Copies.size() - 1];

		GISelKnownBits Info(*MF);

		EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyDispatchPtr));
		EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyQueuePtr));
		EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyKernargSegmentPtr));
		EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyImplicitArgPtr));
		EXPECT_EQ(Align(4), Info.computeKnownAlignment(CopyImplicitBufferPtr));
		}