This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
docs/
-
ReleaseNotes.rst
-
include/llvm/IR/
-
llvm/
-
IR/
6/6
IntrinsicsAMDGPU.td
-
lib/Target/AMDGPU/
-
Target/
-
AMDGPU/
-
AMDGPUAtomicOptimizer.cpp
-
AMDGPUISelLowering.cpp
-
AMDGPUInstCombineIntrinsic.cpp
-
AMDGPUInstructionSelector.cpp
-
AMDGPULegalizerInfo.h
13/15
AMDGPULegalizerInfo.cpp
-
AMDGPURegisterBankInfo.cpp
-
AMDGPUSearchableTables.td
1/2
SIISelLowering.h
-
SIISelLowering.cpp
-
test/
-
Analysis/UniformityAnalysis/AMDGPU/
-
UniformityAnalysis/
-
AMDGPU/
-
llvm.amdgcn.buffer.atomic.ll
-
CodeGen/
-
AMDGPU/
-
GlobalISel/
-
atomic_optimizations_mul_one.ll
-
buffer-atomic-fadd.f32-no-rtn.ll
-
buffer-atomic-fadd.f32-rtn.ll
-
buffer-atomic-fadd.f64.ll
-
buffer-atomic-fadd.v2f16-no-rtn.ll
-
buffer-atomic-fadd.v2f16-rtn.ll
-
buffer-schedule.ll
3/3
fp64-atomics-gfx90a.ll
-
llvm.amdgcn.implicit.buffer.ptr.ll
-
llvm.amdgcn.implicit.ptr.buffer.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.ll
-
llvm.amdgcn.raw.ptr.buffer.load.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll
-
llvm.amdgcn.raw.ptr.buffer.store.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.ll
-
llvm.amdgcn.struct.ptr.buffer.load.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll
-
llvm.amdgcn.struct.ptr.buffer.store.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.ll
-
merge-buffer-stores.ll
-
regbankselect-amdgcn.raw.ptr.buffer.load.ll
-
regbankselect-amdgcn.struct.ptr.buffer.load.ll
-
regbankselect-amdgcn.struct.ptr.buffer.store.ll
-
unsupported-load.ll
-
unsupported-ptr-add.ll
-
amdgcn-load-offset-from-reg.ll
-
amdpal.ll
-
atomic-optimizer-strict-wqm.ll
-
atomic_optimizations_buffer.ll
-
atomic_optimizations_pixelshader.ll
-
atomic_optimizations_raw_buffer.ll
-
atomic_optimizations_struct_buffer.ll
-
bitcast-v4f16-v4i16.ll
-
buffer-atomic-fadd.f32-no-rtn.ll
-
buffer-atomic-fadd.f32-rtn.ll
-
buffer-atomic-fadd.f64.ll
-
buffer-atomic-fadd.v2f16-no-rtn.ll
-
buffer-atomic-fadd.v2f16-rtn.ll
-
buffer-intrinsics-mmo-offsets.ll
-
buffer-rsrc-ptr-ops.ll
-
buffer-schedule.ll
-
cc-sgpr-limit.ll
-
cc-sgpr-over-limit.ll
-
combine-add-zext-xor.ll
-
constant-address-space-32bit.ll
-
copy_to_scc.ll
-
dag-divergence-atomic.ll
-
else.ll
-
extract_subvector_vec4_vec3.ll
-
fix-wwm-vgpr-copy.ll
-
fp-min-max-buffer-ptr-atomics.ll
-
fp64-atomics-gfx90a.ll
-
fp64-min-max-buffer-ptr-atomics.ll
-
gfx90a-enc.ll
-
i1-copy-from-loop.ll
-
legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll
-
legalize-amdgcn.raw.ptr.buffer.load.format.ll
-
legalize-amdgcn.raw.ptr.buffer.load.ll
-
legalize-amdgcn.raw.ptr.buffer.store.format.f16.ll
-
legalize-amdgcn.raw.ptr.buffer.store.format.f32.ll
-
legalize-amdgcn.raw.ptr.buffer.store.ll
-
legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll
-
legalize-amdgcn.raw.ptr.tbuffer.load.ll
-
legalize-amdgcn.raw.ptr.tbuffer.store.f16.ll
-
legalize-amdgcn.raw.ptr.tbuffer.store.ll
-
legalize-soffset-mbuf.ll
-
llvm.amdgcn.buffer.load.dwordx3.ll
-
llvm.amdgcn.buffer.store.dwordx3.ll
-
llvm.amdgcn.exp.ll
-
llvm.amdgcn.implicit.buffer.ptr.hsa.ll
-
llvm.amdgcn.implicit.buffer.ptr.ll
-
llvm.amdgcn.implicit.ptr.buffer.hsa.ll
-
llvm.amdgcn.implicit.ptr.buffer.ll
-
llvm.amdgcn.lds.direct.load.ll
-
llvm.amdgcn.lds.param.load.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.d16.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.ll
-
llvm.amdgcn.raw.ptr.buffer.load.lds.ll
-
llvm.amdgcn.raw.ptr.buffer.load.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.ll
-
llvm.amdgcn.raw.ptr.buffer.store.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.d16.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
-
llvm.amdgcn.struct.ptr.buffer.load.lds.ll
-
llvm.amdgcn.struct.ptr.buffer.load.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.ll
-
llvm.amdgcn.struct.ptr.buffer.store.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.ll
-
llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
-
llvm.amdgcn.struct.ptr.tbuffer.store.ll
-
load-local-redundant-copies.ll
-
loop_exit_with_xor.ll
-
lower-work-group-id-intrinsics.ll
-
merge-store-crash.ll
-
merge-store-usedef.ll
-
mubuf-legalize-operands-non-ptr-intrinsics.ll
-
mubuf-legalize-operands.ll
-
mubuf-shader-vgpr-non-ptr-intrinsics.ll
-
mubuf-shader-vgpr.ll
-
mubuf.ll
-
set-inactive-wwm-overwrite.ll
-
set-wave-priority.ll
-
si-annotate-cf-kill.ll
-
si-scheduler-exports.ll
-
si-triv-disjoint-mem-access.ll
-
uniform-branch-intrinsic-cond.ll
-
vgpr-descriptor-waterfall-loop-idom-update.ll
-
vgpr-spill-emergency-stack-slot.ll
-
vopc_dpp.ll
-
wait.ll
-
wave32.ll
-
wqm.ll
-
wwm-reserved-spill.ll
-
wwm-reserved.ll
-
MIR/AMDGPU/
-
AMDGPU/
-
custom-pseudo-source-values.ll
-
Transforms/
-
InstCombine/AMDGPU/
-
AMDGPU/
-
amdgcn-demanded-vector-elts-inseltpoison.ll
-
amdgcn-demanded-vector-elts.ll
-
LICM/AMDGPU/
-
AMDGPU/
-
buffer-rsrc-ptrs.ll
-
SROA/
-
sroa-common-type-fail-promotion.ll
-
StructurizeCFG/
-
rebuild-ssa-infinite-loop-inseltpoison.ll
-
rebuild-ssa-infinite-loop.ll

Differential D147547

[AMDGPU] Add buffer intrinsics that take resources as pointers
ClosedPublic

Authored by krzysz00 on Apr 4 2023, 10:21 AM.

Download Raw Diff

Details

Reviewers

arsenm
foad
nhaehnle
piotr
rampitec

Group Reviewers

Restricted Project

Commits

rGfaa2c678aa19: [AMDGPU] Add buffer intrinsics that take resources as pointers

Summary

In order to enable the LLVM frontend to better analyze buffer
operations (and to potentially enable more precise analyses on the
backend), define versions of the raw and structured buffer intrinsics
that use ptr addrspace(8) instead of <4 x i32> to represent their
rsrc arguments.

The new intrinsics are named by replacing buffer. with buffer.ptr.

One advantage to these intrinsic definitions is that, instead of
specifying that a buffer load/store will read/write some memory, we
can indicate that the memory read or written will be based on the
pointer argument. This means that, for example, a read from a
noalias buffer can be pulled out of a loop that is modifying a
distinct buffer.

In the future, we will define custom PseudoSourceValues that will
allow us to package up the (buffer, index, offset) triples that buffer
intrinsics contain and allow for more precise backend analysis.

This work also enables creating address space 7, which represents
manipulation of raw buffers using native LLVM load and store
instructions.

Where tests simply used a buffer intrinsic while testing some other
code path (such as the tests for VGPR spills), they have been updated
to use the new intrinsic form. Tests that are "about" buffer
intrinsics (for instance, those that ensure that they codegen as
expected) have been duplicated, either within existing files or into
new ones.

Depends on D145441

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

krzysz00 created this revision.Apr 4 2023, 10:21 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 4 2023, 10:21 AM

Herald added subscribers: kosarev, foad, kerbowa and 11 others. · View Herald Transcript

krzysz00 requested review of this revision.Apr 4 2023, 10:21 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 4 2023, 10:21 AM

Herald added subscribers: llvm-commits, wdng. · View Herald Transcript

krzysz00 added reviewers: arsenm, foad, nhaehnle, Restricted Project.Apr 4 2023, 10:22 AM

Herald added a subscriber: StephenFan. · View Herald TranscriptApr 4 2023, 10:22 AM

Harbormaster completed remote builds in B223613: Diff 510850.Apr 4 2023, 10:22 AM

Thank you! I could only go over the intrinsics definitions right now and they look good to me.

krzysz00 added reviewers: piotr, rampitec.Apr 6 2023, 10:08 AM

Thanks for working on this. Just added a couple of nits.

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
493	Typo trunsform.
1331	Typo haev.
llvm/lib/Target/AMDGPU/SIISelLowering.h
255–256	a addrspace -> an addrspace ? expent -> expect
llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
3	This should be testing gfx90a, not gfx940, right?

arsenm added inline comments.Apr 7 2023, 4:13 PM

llvm/include/llvm/IR/IntrinsicsAMDGPU.td
1074–1075	I'd lean towards swapping the naming around, so that you would have "int_amdgcn_raw_ptr_buffer_load". That way the ISA opcode name part remains unbroken

Rename intrinsics, fix typos

Harbormaster completed remote builds in B224818: Diff 512514.Apr 11 2023, 10:41 AM

krzysz00 added inline comments.Apr 12 2023, 4:31 PM

llvm/include/llvm/IR/IntrinsicsAMDGPU.td
1074–1075	I don't see any reason why not, done.
llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
3	The corresponding non-gisel test has both gfx90a and gfx940 - I figured I should update this one to match while I'm here.

krzysz00 added a child revision: D148184: [AMDGPU] Use resource base for buffer instruction MachineMemOperands.Apr 12 2023, 4:31 PM

Rebase

Harbormaster completed remote builds in B226103: Diff 514234.Apr 17 2023, 7:52 AM

krzysz00 added a parent revision: D145441: [AMDGPU] Define data layout entries for buffers.Apr 21 2023, 8:46 AM

krzysz00 edited the summary of this revision. (Show Details)

Rebase, requiring test updates due to some AND/$scc change

Harbormaster completed remote builds in B228139: Diff 516966.Apr 25 2023, 4:44 PM

arsenm added inline comments.May 1 2023, 1:49 PM

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
513	Can just use an std::array since if this is only 4 piece case?
515–517	Can do VectorElements[I]= B.buildExtractVectorElementConstant(S32, ...).getReg(). Also we really should have a scalarize vector utility in MachineIRBuilder like the DAG does
527	auto BitCast = B.buildBitcast(ScalarTy, BitcastReg)
529	Missing observer notification?
1055–1058	Move this to the end, legal cases should be first and ordered with the most common cases first
2521–2523	This seems very special cased and I don't understand why you need specially handle vector extracts
2529	Fold register creation into the build
2565–2566	Ditto with the extract case
llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
3	Add new run line in pre-commit

Address review comments, update comments

krzysz00 added inline comments.May 2 2023, 12:36 PM

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
134	changeElementType() exists
529	Should I pass the Observer in or have it called at the call sites or? (Also, is it OK to `changingInstruction()/changedInstruction()` recursively?)
1055–1058	Having checked, the legal rule matches before the unsupported, and the matching is done in order, so this needs to come first in order to make sure buffer pointer PTR_ADD gets caught in legalization (as opposed to relying on the fact that we currently can't select it)
2521–2523	Updated the comment, and I think we need to handle them both for generality and since @nhaehnle mentioned they could come up in Vulkan

Harbormaster completed remote builds in B229509: Diff 518829.May 2 2023, 2:46 PM

Add new versions of the legalization tests that got added while I was out

Split adding gfx40 to the gisel fp64 atomics test to its own commit.

Harbormaster completed remote builds in B229845: Diff 519292.May 3 2023, 6:39 PM

krzysz00 marked 2 inline comments as done.May 17 2023, 7:59 AM

loveme00835 added a subscriber: loveme00835.May 31 2023, 4:42 PM

This should get a mention in the release notes

llvm/include/llvm/IR/IntrinsicsAMDGPU.td
1046	Comment should be updated to have ptr first
1285	Should move this with the other gfx908 intrinsics
1362	Same
1365	Same
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
159–160	Can just pass in getElementCount to LLT::vector