This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
include/llvm/IR/
-
llvm/
-
IR/
6/6
IntrinsicsAMDGPU.td
-
lib/Target/AMDGPU/
-
Target/
-
AMDGPU/
-
AMDGPUAtomicOptimizer.cpp
-
AMDGPUISelLowering.cpp
-
AMDGPUInstCombineIntrinsic.cpp
-
AMDGPUInstructionSelector.cpp
-
AMDGPULegalizerInfo.h
13/15
AMDGPULegalizerInfo.cpp
-
AMDGPURegisterBankInfo.cpp
-
AMDGPUSearchableTables.td
1/2
SIISelLowering.h
-
SIISelLowering.cpp
-
test/
-
Analysis/UniformityAnalysis/AMDGPU/
-
UniformityAnalysis/
-
AMDGPU/
-
llvm.amdgcn.buffer.atomic.ll
-
CodeGen/
-
AMDGPU/
-
GlobalISel/
-
atomic_optimizations_mul_one.ll
-
buffer-atomic-fadd.f32-no-rtn.ll
-
buffer-atomic-fadd.f32-rtn.ll
-
buffer-atomic-fadd.f64.ll
-
buffer-atomic-fadd.v2f16-no-rtn.ll
-
buffer-atomic-fadd.v2f16-rtn.ll
-
buffer-schedule.ll
3/3
fp64-atomics-gfx90a.ll
-
llvm.amdgcn.implicit.buffer.ptr.ll
-
llvm.amdgcn.implicit.ptr.buffer.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.ll
-
llvm.amdgcn.raw.ptr.buffer.load.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll
-
llvm.amdgcn.raw.ptr.buffer.store.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.ll
-
llvm.amdgcn.struct.ptr.buffer.load.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll
-
llvm.amdgcn.struct.ptr.buffer.store.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.ll
-
merge-buffer-stores.ll
-
regbankselect-amdgcn.raw.ptr.buffer.load.ll
-
regbankselect-amdgcn.struct.ptr.buffer.load.ll
-
regbankselect-amdgcn.struct.ptr.buffer.store.ll
-
unsupported-load.ll
-
unsupported-ptr-add.ll
-
amdgcn-load-offset-from-reg.ll
-
amdpal.ll
-
atomic-optimizer-strict-wqm.ll
-
atomic_optimizations_buffer.ll
-
atomic_optimizations_pixelshader.ll
-
atomic_optimizations_raw_buffer.ll
-
atomic_optimizations_struct_buffer.ll
-
bitcast-v4f16-v4i16.ll
-
buffer-atomic-fadd.f32-no-rtn.ll
-
buffer-atomic-fadd.f32-rtn.ll
-
buffer-atomic-fadd.f64.ll
-
buffer-atomic-fadd.v2f16-no-rtn.ll
-
buffer-atomic-fadd.v2f16-rtn.ll
-
buffer-intrinsics-mmo-offsets.ll
-
buffer-rsrc-ptr-ops.ll
-
buffer-schedule.ll
-
cc-sgpr-limit.ll
-
cc-sgpr-over-limit.ll
-
combine-add-zext-xor.ll
-
constant-address-space-32bit.ll
-
copy_to_scc.ll
-
dag-divergence-atomic.ll
-
else.ll
-
extract_subvector_vec4_vec3.ll
-
fix-wwm-vgpr-copy.ll
-
fp-min-max-buffer-ptr-atomics.ll
-
fp64-atomics-gfx90a.ll
-
fp64-min-max-buffer-ptr-atomics.ll
-
gfx90a-enc.ll
-
i1-copy-from-loop.ll
-
llvm.amdgcn.buffer.load.dwordx3.ll
-
llvm.amdgcn.buffer.store.dwordx3.ll
-
llvm.amdgcn.exp.ll
-
llvm.amdgcn.implicit.buffer.ptr.hsa.ll
-
llvm.amdgcn.implicit.buffer.ptr.ll
-
llvm.amdgcn.implicit.ptr.buffer.hsa.ll
-
llvm.amdgcn.implicit.ptr.buffer.ll
-
llvm.amdgcn.lds.direct.load.ll
-
llvm.amdgcn.lds.param.load.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.raw.ptr.buffer.atomic.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.d16.ll
-
llvm.amdgcn.raw.ptr.buffer.load.format.ll
-
llvm.amdgcn.raw.ptr.buffer.load.lds.ll
-
llvm.amdgcn.raw.ptr.buffer.load.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
-
llvm.amdgcn.raw.ptr.buffer.store.format.ll
-
llvm.amdgcn.raw.ptr.buffer.store.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.load.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
-
llvm.amdgcn.raw.ptr.tbuffer.store.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
-
llvm.amdgcn.struct.ptr.buffer.atomic.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.d16.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.ll
-
llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
-
llvm.amdgcn.struct.ptr.buffer.load.lds.ll
-
llvm.amdgcn.struct.ptr.buffer.load.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
-
llvm.amdgcn.struct.ptr.buffer.store.format.ll
-
llvm.amdgcn.struct.ptr.buffer.store.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll
-
llvm.amdgcn.struct.ptr.tbuffer.load.ll
-
llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
-
llvm.amdgcn.struct.ptr.tbuffer.store.ll
-
load-local-redundant-copies.ll
-
loop_exit_with_xor.ll
-
lower-work-group-id-intrinsics.ll
-
merge-store-crash.ll
-
merge-store-usedef.ll
-
mubuf-legalize-operands-non-ptr-intrinsics.ll
-
mubuf-legalize-operands.ll
-
mubuf-shader-vgpr-non-ptr-intrinsics.ll
-
mubuf-shader-vgpr.ll
-
mubuf.ll
-
set-inactive-wwm-overwrite.ll
-
set-wave-priority.ll
-
si-annotate-cf-kill.ll
-
si-scheduler-exports.ll
-
si-triv-disjoint-mem-access.ll
-
uniform-branch-intrinsic-cond.ll
-
vgpr-descriptor-waterfall-loop-idom-update.ll
-
vgpr-spill-emergency-stack-slot.ll
-
vopc_dpp.ll
-
wait.ll
-
wave32.ll
-
wqm.ll
-
wwm-reserved-spill.ll
-
wwm-reserved.ll
-
MIR/AMDGPU/
-
AMDGPU/
-
custom-pseudo-source-values.ll
-
Transforms/
-
InstCombine/AMDGPU/
-
AMDGPU/
-
amdgcn-demanded-vector-elts-inseltpoison.ll
-
amdgcn-demanded-vector-elts.ll
-
LICM/AMDGPU/
-
AMDGPU/
-
buffer-rsrc-ptrs.ll
-
SROA/
-
sroa-common-type-fail-promotion.ll
-
StructurizeCFG/
-
rebuild-ssa-infinite-loop-inseltpoison.ll
-
rebuild-ssa-infinite-loop.ll

Differential D147547

[AMDGPU] Add buffer intrinsics that take resources as pointers
ClosedPublic

Authored by krzysz00 on Apr 4 2023, 10:21 AM.

Download Raw Diff

Details

Reviewers

arsenm
foad
nhaehnle
piotr
rampitec

Group Reviewers

Restricted Project

Commits

rGfaa2c678aa19: [AMDGPU] Add buffer intrinsics that take resources as pointers

Summary

In order to enable the LLVM frontend to better analyze buffer
operations (and to potentially enable more precise analyses on the
backend), define versions of the raw and structured buffer intrinsics
that use ptr addrspace(8) instead of <4 x i32> to represent their
rsrc arguments.

The new intrinsics are named by replacing buffer. with buffer.ptr.

One advantage to these intrinsic definitions is that, instead of
specifying that a buffer load/store will read/write some memory, we
can indicate that the memory read or written will be based on the
pointer argument. This means that, for example, a read from a
noalias buffer can be pulled out of a loop that is modifying a
distinct buffer.

In the future, we will define custom PseudoSourceValues that will
allow us to package up the (buffer, index, offset) triples that buffer
intrinsics contain and allow for more precise backend analysis.

This work also enables creating address space 7, which represents
manipulation of raw buffers using native LLVM load and store
instructions.

Where tests simply used a buffer intrinsic while testing some other
code path (such as the tests for VGPR spills), they have been updated
to use the new intrinsic form. Tests that are "about" buffer
intrinsics (for instance, those that ensure that they codegen as
expected) have been duplicated, either within existing files or into
new ones.

Depends on D145441

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

krzysz00 created this revision.Apr 4 2023, 10:21 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 4 2023, 10:21 AM

Herald added subscribers: kosarev, foad, kerbowa and 11 others. · View Herald Transcript

krzysz00 requested review of this revision.Apr 4 2023, 10:21 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 4 2023, 10:21 AM

Herald added subscribers: llvm-commits, wdng. · View Herald Transcript

krzysz00 added reviewers: arsenm, foad, nhaehnle, Restricted Project.Apr 4 2023, 10:22 AM

Herald added a subscriber: StephenFan. · View Herald TranscriptApr 4 2023, 10:22 AM

Harbormaster completed remote builds in B223613: Diff 510850.Apr 4 2023, 10:22 AM

Thank you! I could only go over the intrinsics definitions right now and they look good to me.

krzysz00 added reviewers: piotr, rampitec.Apr 6 2023, 10:08 AM

Thanks for working on this. Just added a couple of nits.

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
462	Typo trunsform.
1303	Typo haev.
llvm/lib/Target/AMDGPU/SIISelLowering.h
254–255	a addrspace -> an addrspace ? expent -> expect
llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
3	This should be testing gfx90a, not gfx940, right?

arsenm added inline comments.Apr 7 2023, 4:13 PM

llvm/include/llvm/IR/IntrinsicsAMDGPU.td
1072–1073	I'd lean towards swapping the naming around, so that you would have "int_amdgcn_raw_ptr_buffer_load". That way the ISA opcode name part remains unbroken

Rename intrinsics, fix typos

Harbormaster completed remote builds in B224818: Diff 512514.Apr 11 2023, 10:41 AM

krzysz00 added inline comments.Apr 12 2023, 4:31 PM

llvm/include/llvm/IR/IntrinsicsAMDGPU.td
1072–1073	I don't see any reason why not, done.
llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
3	The corresponding non-gisel test has both gfx90a and gfx940 - I figured I should update this one to match while I'm here.

krzysz00 added a child revision: D148184: [AMDGPU] Use resource base for buffer instruction MachineMemOperands.Apr 12 2023, 4:31 PM

Rebase

Harbormaster completed remote builds in B226103: Diff 514234.Apr 17 2023, 7:52 AM

krzysz00 added a parent revision: D145441: [AMDGPU] Define data layout entries for buffers.Apr 21 2023, 8:46 AM

krzysz00 edited the summary of this revision. (Show Details)

Rebase, requiring test updates due to some AND/$scc change

Harbormaster completed remote builds in B228139: Diff 516966.Apr 25 2023, 4:44 PM

arsenm added inline comments.May 1 2023, 1:49 PM

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
482	Can just use an std::array since if this is only 4 piece case?
484–486	Can do VectorElements[I]= B.buildExtractVectorElementConstant(S32, ...).getReg(). Also we really should have a scalarize vector utility in MachineIRBuilder like the DAG does
496	auto BitCast = B.buildBitcast(ScalarTy, BitcastReg)
498	Missing observer notification?
1027	Move this to the end, legal cases should be first and ordered with the most common cases first
2479–2481	This seems very special cased and I don't understand why you need specially handle vector extracts
2487	Fold register creation into the build
2525–2526	Ditto with the extract case
llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
3	Add new run line in pre-commit

Address review comments, update comments

krzysz00 added inline comments.May 2 2023, 12:36 PM

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
134	changeElementType() exists
498	Should I pass the Observer in or have it called at the call sites or? (Also, is it OK to `changingInstruction()/changedInstruction()` recursively?)
1027	Having checked, the legal rule matches before the unsupported, and the matching is done in order, so this needs to come first in order to make sure buffer pointer PTR_ADD gets caught in legalization (as opposed to relying on the fact that we currently can't select it)
2479–2481	Updated the comment, and I think we need to handle them both for generality and since @nhaehnle mentioned they could come up in Vulkan

Harbormaster completed remote builds in B229509: Diff 518829.May 2 2023, 2:46 PM

Add new versions of the legalization tests that got added while I was out

Split adding gfx40 to the gisel fp64 atomics test to its own commit.

Harbormaster completed remote builds in B229845: Diff 519292.May 3 2023, 6:39 PM

krzysz00 marked 2 inline comments as done.May 17 2023, 7:59 AM

loveme00835 added a subscriber: loveme00835.May 31 2023, 4:42 PM

This should get a mention in the release notes

llvm/include/llvm/IR/IntrinsicsAMDGPU.td
1044	Comment should be updated to have ptr first
1245	Should move this with the other gfx908 intrinsics
1325	Same
1328	Same
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
137–138	Can just pass in getElementCount to LLT::vector

Add release notes items for this patch stack, fix review comments.

Herald added subscribers: s.egerton, simoncook. · View Herald TranscriptJun 1 2023, 2:26 PM

Harbormaster completed remote builds in B235991: Diff 527623.Jun 1 2023, 4:56 PM

LGTM with a nit, but please wait for Matt's approval.

I also did some extra sanity testing and verified that all your three outstanding patches are NFC for the graphics workloads.

llvm/lib/Target/AMDGPU/SIISelLowering.h
254–255	expct -> expect (no need to re-submit to phab just for that)

arsenm accepted this revision.Jun 2 2023, 12:20 PM

This revision is now accepted and ready to land.Jun 2 2023, 12:20 PM

Rebase

Hopefully this makes arc land work

This revision was landed with ongoing or failed builds.Jun 5 2023, 9:59 AM

Closed by commit rGfaa2c678aa19: [AMDGPU] Add buffer intrinsics that take resources as pointers (authored by krzysz00). · Explain Why

This revision was automatically updated to reflect the committed changes.

krzysz00 added a commit: rGfaa2c678aa19: [AMDGPU] Add buffer intrinsics that take resources as pointers.

Harbormaster completed remote builds in B236656: Diff 528471.Jun 5 2023, 10:04 AM

Large Diff

This large diff affects 152 files. Files without inline comments have been collapsed. Expand All Files

Revision Contents

Path

Size

llvm/

include/

llvm/

IR/

IntrinsicsAMDGPU.td

254 lines

lib/

Target/

AMDGPU/

AMDGPUAtomicOptimizer.cpp

18 lines

AMDGPUISelLowering.cpp

6 lines

AMDGPUInstCombineIntrinsic.cpp

8 lines

AMDGPUInstructionSelector.cpp

2 lines

AMDGPULegalizerInfo.h

5 lines

AMDGPULegalizerInfo.cpp

363 lines

AMDGPURegisterBankInfo.cpp

29 lines

AMDGPUSearchableTables.td

32 lines

SIISelLowering.h

7 lines

SIISelLowering.cpp

346 lines

test/

Analysis/

UniformityAnalysis/

AMDGPU/

llvm.amdgcn.buffer.atomic.ll

201 lines

CodeGen/

AMDGPU/

GlobalISel/

atomic_optimizations_mul_one.ll

306 lines

buffer-atomic-fadd.f32-no-rtn.ll

137 lines

buffer-atomic-fadd.f32-rtn.ll

145 lines

buffer-atomic-fadd.f64.ll

197 lines

buffer-atomic-fadd.v2f16-no-rtn.ll

137 lines

buffer-atomic-fadd.v2f16-rtn.ll

84 lines

buffer-schedule.ll

33 lines

fp64-atomics-gfx90a.ll

1122 lines

llvm.amdgcn.implicit.buffer.ptr.ll

	llvm.amdgcn.implicit.ptr.buffer.ll
	llvm.amdgcn.implicit.buffer.ptr.ll

16 lines

llvm.amdgcn.raw.ptr.buffer.atomic.add.ll

257 lines

llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll

216 lines

llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll

25 lines

llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll

483 lines

llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll

315 lines

llvm.amdgcn.raw.ptr.buffer.load.format.ll

184 lines

llvm.amdgcn.raw.ptr.buffer.load.ll

947 lines

llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll

566 lines

llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll

338 lines

llvm.amdgcn.raw.ptr.buffer.store.ll

869 lines

llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll

387 lines

llvm.amdgcn.raw.ptr.tbuffer.load.ll

232 lines

llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll

566 lines

llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll

346 lines

llvm.amdgcn.raw.ptr.tbuffer.store.ll

697 lines

llvm.amdgcn.struct.ptr.buffer.atomic.add.ll

251 lines

llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll

215 lines

llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll

26 lines

llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll

514 lines

llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll

375 lines

llvm.amdgcn.struct.ptr.buffer.load.format.ll

334 lines

llvm.amdgcn.struct.ptr.buffer.load.ll

433 lines

llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll

289 lines

llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll

181 lines

llvm.amdgcn.struct.ptr.buffer.store.ll

287 lines

llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll

376 lines

llvm.amdgcn.struct.ptr.tbuffer.load.ll

219 lines

merge-buffer-stores.ll

72 lines

regbankselect-amdgcn.raw.ptr.buffer.load.ll

212 lines

regbankselect-amdgcn.struct.ptr.buffer.load.ll

208 lines

regbankselect-amdgcn.struct.ptr.buffer.store.ll

209 lines

unsupported-load.ll

9 lines

unsupported-ptr-add.ll

14 lines

amdgcn-load-offset-from-reg.ll

8 lines

amdpal.ll

4 lines

atomic-optimizer-strict-wqm.ll

4 lines

atomic_optimizations_buffer.ll

42 lines

atomic_optimizations_pixelshader.ll

16 lines

atomic_optimizations_raw_buffer.ll

36 lines

atomic_optimizations_struct_buffer.ll

44 lines

bitcast-v4f16-v4i16.ll

12 lines

buffer-atomic-fadd.f32-no-rtn.ll

185 lines

buffer-atomic-fadd.f32-rtn.ll

193 lines

buffer-atomic-fadd.f64.ll

245 lines

buffer-atomic-fadd.v2f16-no-rtn.ll

185 lines

buffer-atomic-fadd.v2f16-rtn.ll

108 lines

buffer-intrinsics-mmo-offsets.ll

475 lines

buffer-rsrc-ptr-ops.ll

113 lines

buffer-schedule.ll

34 lines

cc-sgpr-limit.ll

6 lines

cc-sgpr-over-limit.ll

2 lines

combine-add-zext-xor.ll

16 lines

constant-address-space-32bit.ll

8 lines

copy_to_scc.ll

6 lines

dag-divergence-atomic.ll

128 lines

else.ll

4 lines

extract_subvector_vec4_vec3.ll

8 lines

fix-wwm-vgpr-copy.ll

12 lines

fp-min-max-buffer-ptr-atomics.ll

703 lines

fp64-atomics-gfx90a.ll

552 lines

fp64-min-max-buffer-ptr-atomics.ll

536 lines

gfx90a-enc.ll

12 lines

i1-copy-from-loop.ll

8 lines

llvm.amdgcn.buffer.load.dwordx3.ll

34 lines

llvm.amdgcn.buffer.store.dwordx3.ll

31 lines

llvm.amdgcn.exp.ll

4 lines

llvm.amdgcn.implicit.buffer.ptr.hsa.ll

llvm.amdgcn.implicit.buffer.ptr.ll

	llvm.amdgcn.implicit.ptr.buffer.hsa.ll
	llvm.amdgcn.implicit.buffer.ptr.hsa.ll

22 lines

	llvm.amdgcn.implicit.ptr.buffer.ll
	llvm.amdgcn.implicit.buffer.ptr.ll

33 lines

llvm.amdgcn.lds.direct.load.ll

16 lines

llvm.amdgcn.lds.param.load.ll

16 lines

llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll

72 lines

llvm.amdgcn.raw.ptr.buffer.atomic.ll

126 lines

llvm.amdgcn.raw.ptr.buffer.load.format.d16.ll

54 lines

llvm.amdgcn.raw.ptr.buffer.load.format.ll

87 lines

llvm.amdgcn.raw.ptr.buffer.load.lds.ll

112 lines

llvm.amdgcn.raw.ptr.buffer.load.ll

1107 lines

llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll

81 lines

llvm.amdgcn.raw.ptr.buffer.store.format.ll

76 lines

llvm.amdgcn.raw.ptr.buffer.store.ll

326 lines

llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll

142 lines

llvm.amdgcn.raw.ptr.tbuffer.load.ll

338 lines

llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll

212 lines

llvm.amdgcn.raw.ptr.tbuffer.store.ll

276 lines

llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll

61 lines

llvm.amdgcn.struct.ptr.buffer.atomic.ll

138 lines

llvm.amdgcn.struct.ptr.buffer.load.format.d16.ll

65 lines

llvm.amdgcn.struct.ptr.buffer.load.format.ll

858 lines

llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll

136 lines

llvm.amdgcn.struct.ptr.buffer.load.lds.ll

125 lines

llvm.amdgcn.struct.ptr.buffer.load.ll

292 lines

llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll

92 lines

llvm.amdgcn.struct.ptr.buffer.store.format.ll

114 lines

llvm.amdgcn.struct.ptr.buffer.store.ll

197 lines

llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll

158 lines

llvm.amdgcn.struct.ptr.tbuffer.load.ll

446 lines

llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll

236 lines

llvm.amdgcn.struct.ptr.tbuffer.store.ll

499 lines

load-local-redundant-copies.ll

24 lines

loop_exit_with_xor.ll

8 lines

lower-work-group-id-intrinsics.ll

4 lines

merge-store-crash.ll

4 lines

merge-store-usedef.ll

4 lines

	mubuf-legalize-operands-non-ptr-intrinsics.ll
	mubuf-legalize-operands.ll

1 line

mubuf-legalize-operands.ll

220 lines

	mubuf-shader-vgpr-non-ptr-intrinsics.ll
	mubuf-shader-vgpr.ll

3 lines

mubuf-shader-vgpr.ll

28 lines

mubuf.ll

20 lines

set-inactive-wwm-overwrite.ll

14 lines

set-wave-priority.ll

38 lines

si-annotate-cf-kill.ll

18 lines

si-scheduler-exports.ll

4 lines

si-triv-disjoint-mem-access.ll

4 lines

uniform-branch-intrinsic-cond.ll

8 lines

vgpr-descriptor-waterfall-loop-idom-update.ll

12 lines

vgpr-spill-emergency-stack-slot.ll

8 lines

4 lines

22 lines

10 lines

608 lines

wwm-reserved-spill.ll

173 lines

wwm-reserved.ll

68 lines

MIR/

AMDGPU/

custom-pseudo-source-values.ll

16 lines

Transforms/

InstCombine/

AMDGPU/

amdgcn-demanded-vector-elts-inseltpoison.ll

2323 lines

amdgcn-demanded-vector-elts.ll

2322 lines

LICM/

AMDGPU/

buffer-rsrc-ptrs.ll

222 lines

SROA/

sroa-common-type-fail-promotion.ll

1 line

StructurizeCFG/

rebuild-ssa-infinite-loop-inseltpoison.ll

4 lines

rebuild-ssa-infinite-loop.ll

4 lines

Diff 516966

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Show First 20 Lines • Show All 982 Lines • ▼ Show 20 Lines	defm int_amdgcn_image_atomic_cmpswap :
AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">,		AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">,
AMDGPUArg<LLVMMatchType<0>, "cmp">]>;		AMDGPUArg<LLVMMatchType<0>, "cmp">]>;
}		}

//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////
// Buffer intrinsics		// Buffer intrinsics
//////////////////////////////////////////////////////////////////////////		//////////////////////////////////////////////////////////////////////////

		// Data type for buffer resources (V#). Maybe, in the future, we can create a
		// similar one for textures (T#).
		class AMDGPUBufferRsrcTy<LLVMType data_ty = llvm_any_ty>
		: LLVMQualPointerType<data_ty, 8>;

let TargetPrefix = "amdgcn" in {		let TargetPrefix = "amdgcn" in {

defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {		defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {

class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <		class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[data_ty],		[data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)		[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
Show All 30 Lines

// New buffer intrinsics with separate raw and struct variants. The raw		// New buffer intrinsics with separate raw and struct variants. The raw
// variant never has an index. The struct variant always has an index, even if		// variant never has an index. The struct variant always has an index, even if
// it is const 0. A struct intrinsic with constant 0 index is different to the		// it is const 0. A struct intrinsic with constant 0 index is different to the
// corresponding raw intrinsic on gfx9+ because the behavior of bound checking		// corresponding raw intrinsic on gfx9+ because the behavior of bound checking
// and swizzling changes depending on whether idxen is set in the instruction.		// and swizzling changes depending on whether idxen is set in the instruction.
// These new instrinsics also keep the offset and soffset arguments separate as		// These new instrinsics also keep the offset and soffset arguments separate as
// they behave differently in bounds checking and swizzling.		// they behave differently in bounds checking and swizzling.

		// The versions of these intrinsics that take <4 x i32> arguments are deprecated
		// in favor of their .buffer.ptr variants that take ptr addrspace(8) arguments,
		arsenmUnsubmitted Done Reply Inline Actions Comment should be updated to have ptr first arsenm: Comment should be updated to have ptr first
		// which allow for improved reasoning about memory accesses.
class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <		class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[data_ty],		[data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)		[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+),		// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrReadMem, ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,		[IntrReadMem, ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;		AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;		def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;		def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;

		class AMDGPURawPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
		[data_ty],
		[AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
		ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<0>;
		def int_amdgcn_raw_ptr_buffer_load_format : AMDGPURawPtrBufferLoad<llvm_anyfloat_ty>;
		def int_amdgcn_raw_ptr_buffer_load : AMDGPURawPtrBufferLoad;
		arsenmUnsubmitted Done Reply Inline Actions I'd lean towards swapping the naming around, so that you would have "int_amdgcn_raw_ptr_buffer_load". That way the ISA opcode name part remains unbroken arsenm: I'd lean towards swapping the naming around, so that you would have…
		krzysz00AuthorUnsubmitted Done Reply Inline Actions I don't see any reason why not, done. krzysz00: I don't see any reason why not, done.

class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <		class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[data_ty],		[data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)		[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+),		// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrReadMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,		[IntrReadMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;		AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;		def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;		def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;

		class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
		[data_ty],
		[AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // vindex(VGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
		ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<0>;
		def int_amdgcn_struct_ptr_buffer_load_format : AMDGPUStructPtrBufferLoad;
		def int_amdgcn_struct_ptr_buffer_load : AMDGPUStructPtrBufferLoad;

class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <		class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[],		[],
[data_ty, // vdata(VGPR)		[data_ty, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)		llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+),		// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrWriteMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,		[IntrWriteMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;		AMDGPURsrcIntrinsic<1>;
def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;		def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;		def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;

		class AMDGPURawPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
		[],
		[data_ty, // vdata(VGPR)
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<1>;
		def int_amdgcn_raw_ptr_buffer_store_format : AMDGPURawPtrBufferStore<llvm_anyfloat_ty>;
		def int_amdgcn_raw_ptr_buffer_store : AMDGPURawPtrBufferStore;

class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <		class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[],		[],
[data_ty, // vdata(VGPR)		[data_ty, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)		llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+),		// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrWriteMem, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,		[IntrWriteMem, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;		AMDGPURsrcIntrinsic<1>;
def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;		def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;		def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;

		class AMDGPUStructPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
		[],
		[data_ty, // vdata(VGPR)
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // vindex(VGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<1>;
		def int_amdgcn_struct_ptr_buffer_store_format : AMDGPUStructPtrBufferStore;
		def int_amdgcn_struct_ptr_buffer_store : AMDGPUStructPtrBufferStore;

class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <		class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
!if(NoRtn, [], [data_ty]),		!if(NoRtn, [], [data_ty]),
[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)		[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)		llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)		llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
[ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,		[ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
Show All 21 Lines	def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)		llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
[ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,		[ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;		AMDGPURsrcIntrinsic<2, 0>;

// gfx908 intrinsic		// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;		def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;

		class AMDGPURawPtrBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
		!if(NoRtn, [], [data_ty]),
		[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
		[IntrArgMemOnly, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<1, 0>;

		def int_amdgcn_raw_ptr_buffer_atomic_swap : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_add : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_sub : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_smin : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_umin : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_fmin : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
		def int_amdgcn_raw_ptr_buffer_atomic_smax : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_umax : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_fmax : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
		def int_amdgcn_raw_ptr_buffer_atomic_and : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_or : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_xor : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_inc : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_dec : AMDGPURawPtrBufferAtomic;
		def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
		[llvm_anyint_ty],
		[LLVMMatchType<0>, // src(VGPR)
		LLVMMatchType<0>, // cmp(VGPR)
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
		[IntrArgMemOnly, NoCapture<ArgIndex<2>>,
		ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<2, 0>;

		// gfx908 intrinsic
		def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
		arsenmUnsubmitted Done Reply Inline Actions Should move this with the other gfx908 intrinsics arsenm: Should move this with the other gfx908 intrinsics

class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <		class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
!if(NoRtn, [], [data_ty]),		!if(NoRtn, [], [data_ty]),
[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)		[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)		llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)		llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
Show All 25 Lines

// gfx908 intrinsic		// gfx908 intrinsic
def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;		def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;

// gfx90a intrinsics		// gfx90a intrinsics
def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;		def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_struct_buffer_atomic_fmax : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;		def int_amdgcn_struct_buffer_atomic_fmax : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;

		class AMDGPUStructPtrBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
		!if(NoRtn, [], [data_ty]),
		[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // vindex(VGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
		[IntrArgMemOnly, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<1, 0>;
		def int_amdgcn_struct_ptr_buffer_atomic_swap : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_add : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_sub : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_smin : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_umin : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_smax : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_umax : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_and : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_or : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_xor : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_inc : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_dec : AMDGPUStructPtrBufferAtomic;
		def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
		[llvm_anyint_ty],
		[LLVMMatchType<0>, // src(VGPR)
		LLVMMatchType<0>, // cmp(VGPR)
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // vindex(VGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
		[IntrArgMemOnly, NoCapture<ArgIndex<2>>,
		ImmArg<ArgIndex<6>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<2, 0>;

		// gfx908 intrinsic
		def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
		arsenmUnsubmitted Done Reply Inline Actions Same arsenm: Same

		// gfx90a intrinsics
		def int_amdgcn_struct_ptr_buffer_atomic_fmin : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
		arsenmUnsubmitted Done Reply Inline Actions Same arsenm: Same
		def int_amdgcn_struct_ptr_buffer_atomic_fmax : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;

// Obsolescent tbuffer intrinsics.		// Obsolescent tbuffer intrinsics.
def int_amdgcn_tbuffer_load : DefaultAttrsIntrinsic <		def int_amdgcn_tbuffer_load : DefaultAttrsIntrinsic <
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32		[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
[llvm_v4i32_ty, // rsrc(SGPR)		[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // voffset(VGPR)		llvm_i32_ty, // voffset(VGPR)
llvm_i32_ty, // soffset(SGPR)		llvm_i32_ty, // soffset(SGPR)
Show All 37 Lines	def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+),		// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrReadMem,		[IntrReadMem,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,		ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;		AMDGPURsrcIntrinsic<0>;

		def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic <
		[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
		[AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds` checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
		ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<0>;

def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic <		def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic <
[],		[],
[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32		[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
llvm_v4i32_ty, // rsrc(SGPR)		llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)		llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+),		// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrWriteMem,		[IntrWriteMem,
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,		ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;		AMDGPURsrcIntrinsic<1>;

		def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic <
		[],
		[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<1>;

def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic <		def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic <
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32		[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
[llvm_v4i32_ty, // rsrc(SGPR)		[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)		llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+),		// bit 2 = dlc on gfx10+),
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrReadMem,		[IntrReadMem,
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,		ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;		AMDGPURsrcIntrinsic<0>;

		def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic <
		[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
		[AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // vindex(VGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
		ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<0>;

		def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic <
		[],
		[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
		AMDGPUBufferRsrcTy<LLVMMatchType<0>>, // rsrc(SGPR)
		llvm_i32_ty, // vindex(VGPR)
		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+),
		// swizzled buffer (bit 3 = swz))
		[IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
		AMDGPURsrcIntrinsic<1>;

def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <		def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <
[],		[],
[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32		[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
llvm_v4i32_ty, // rsrc(SGPR)		llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)		llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)		llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	class AMDGPURawBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+))		// bit 2 = dlc on gfx10+))
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,		[IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;		ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;		def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;

		class AMDGPURawPtrBufferLoadLDS : Intrinsic <
		[],
		[AMDGPUBufferRsrcTy<llvm_i8_ty>, // rsrc(SGPR)
		LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset
		llvm_i32_ty, // Data byte size: 1/2/4
		llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+))
		// swizzled buffer (bit 3 = swz))
		[IntrWillReturn, IntrArgMemOnly,
		ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
		WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
		ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
		def int_amdgcn_raw_ptr_buffer_load_lds : AMDGPURawPtrBufferLoadLDS;

class AMDGPUStructBufferLoadLDS : Intrinsic <		class AMDGPUStructBufferLoadLDS : Intrinsic <
[],		[],
[llvm_v4i32_ty, // rsrc(SGPR)		[llvm_v4i32_ty, // rsrc(SGPR)
LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset		LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset
llvm_i32_ty, // Data byte size: 1/2/4		llvm_i32_ty, // Data byte size: 1/2/4
llvm_i32_ty, // vindex(VGPR)		llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)		llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)		llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
// bit 1 = slc,		// bit 1 = slc,
// bit 2 = dlc on gfx10+))		// bit 2 = dlc on gfx10+))
// swizzled buffer (bit 3 = swz))		// swizzled buffer (bit 3 = swz))
[IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,		[IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;		ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;		def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;

		class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
		[],
		[AMDGPUBufferRsrcTy<llvm_i8_ty>, // rsrc(SGPR)
		LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset
		llvm_i32_ty, // Data byte size: 1/2/4
		llvm_i32_ty, // vindex(VGPR)
		llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
		llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
		llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
		llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
		// bit 1 = slc,
		// bit 2 = dlc on gfx10+))
		// swizzled buffer (bit 3 = swz))
		[IntrWillReturn, IntrArgMemOnly,
		ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
		WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
		ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
		ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
		def int_amdgcn_struct_ptr_buffer_load_lds : AMDGPUStructPtrBufferLoadLDS;

} // defset AMDGPUBufferIntrinsics		} // defset AMDGPUBufferIntrinsics

// Uses that do not set the done bit should set IntrWriteMem on the		// Uses that do not set the done bit should set IntrWriteMem on the
// call site.		// call site.
def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [		def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [
llvm_i32_ty, // tgt,		llvm_i32_ty, // tgt,
llvm_i32_ty, // en		llvm_i32_ty, // en
llvm_any_ty, // src0 (f32 or i32)		llvm_any_ty, // src0 (f32 or i32)
▲ Show 20 Lines • Show All 1,121 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Add buffer intrinsics that take resources as pointersClosedPublic

Details

Diff Detail

Event Timeline

Large Diff

Revision Contents

Diff 516966

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

llvm/lib/Target/AMDGPU/SIISelLowering.h

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-load.ll

llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll

llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll

llvm/test/CodeGen/AMDGPU/amdpal.ll

llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll

llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll

llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll

llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll

llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll

llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll

llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll

llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll

llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll

llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll

llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll

llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll

llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll

llvm/test/CodeGen/AMDGPU/buffer-schedule.ll

llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll

[AMDGPU] Add buffer intrinsics that take resources as pointers
ClosedPublic