Skip to content

Commit 523dab0

Browse files
author
Neil Henning
committedMar 18, 2019
[AMDGPU] Add an experimental buffer fat pointer address space.
Add an experimental buffer fat pointer address space that is currently unhandled in the backend. This commit reserves address space 7 as a non-integral pointer repsenting the 160-bit fat pointer (128-bit buffer descriptor + 32-bit offset) that is heavily used in graphics workloads using the AMDGPU backend. Differential Revision: https://reviews.llvm.org/D58957 llvm-svn: 356373
1 parent 6063393 commit 523dab0

9 files changed

+96
-26
lines changed
 

‎llvm/docs/AMDGPUUsage.rst

+11-3
Original file line numberDiff line numberDiff line change
@@ -281,17 +281,25 @@ LLVM Address Space number is used throughout LLVM (for example, in LLVM IR).
281281
.. table:: Address Space Mapping
282282
:name: amdgpu-address-space-mapping-table
283283

284-
================== =================
284+
================== =================================
285285
LLVM Address Space Memory Space
286-
================== =================
286+
================== =================================
287287
0 Generic (Flat)
288288
1 Global
289289
2 Region (GDS)
290290
3 Local (group/LDS)
291291
4 Constant
292292
5 Private (Scratch)
293293
6 Constant 32-bit
294-
================== =================
294+
7 Buffer Fat Pointer (experimental)
295+
================== =================================
296+
297+
The buffer fat pointer is an experimental address space that is currently
298+
unsupported in the backend. It exposes a non-integral pointer that is in future
299+
intended to support the modelling of 128-bit buffer descriptors + a 32-bit
300+
offset into the buffer descriptor (in total encapsulating a 160-bit 'pointer'),
301+
allowing us to use normal LLVM load/store/atomic operations to model the buffer
302+
descriptors used heavily in graphics workloads targeting the backend.
295303

296304
.. _amdgpu-memory-scopes:
297305

‎llvm/lib/Target/AMDGPU/AMDGPU.h

+7-5
Original file line numberDiff line numberDiff line change
@@ -245,21 +245,23 @@ enum TargetIndex {
245245
namespace AMDGPUAS {
246246
enum : unsigned {
247247
// The maximum value for flat, generic, local, private, constant and region.
248-
MAX_AMDGPU_ADDRESS = 6,
248+
MAX_AMDGPU_ADDRESS = 7,
249249

250250
FLAT_ADDRESS = 0, ///< Address space for flat memory.
251251
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
252252
REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
253253

254-
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
254+
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
255255
LOCAL_ADDRESS = 3, ///< Address space for local memory.
256256
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
257257

258-
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
258+
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
259259

260-
/// Address space for direct addressible parameter memory (CONST0)
260+
BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
261+
262+
/// Address space for direct addressible parameter memory (CONST0).
261263
PARAM_D_ADDRESS = 6,
262-
/// Address space for indirect addressible parameter memory (VTX1)
264+
/// Address space for indirect addressible parameter memory (VTX1).
263265
PARAM_I_ADDRESS = 7,
264266

265267
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on

‎llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

+12-11
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,21 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
5353
AU.setPreservesAll();
5454
}
5555

56-
// These arrays are indexed by address space value enum elements 0 ... to 6
57-
static const AliasResult ASAliasRules[7][7] = {
58-
/* Flat Global Region Group Constant Private Constant 32-bit */
59-
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
60-
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
61-
/* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias , MayAlias},
62-
/* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias},
63-
/* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias},
64-
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
65-
/* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
56+
// These arrays are indexed by address space value enum elements 0 ... to 7
57+
static const AliasResult ASAliasRules[8][8] = {
58+
/* Flat Global Region Group Constant Private Constant 32-bit Buffer Fat Ptr */
59+
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
60+
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias},
61+
/* Region */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
62+
/* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
63+
/* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
64+
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
65+
/* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
66+
/* Buffer Fat Ptr */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias}
6667
};
6768

6869
static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
69-
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
70+
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 7, "Addr space out of range");
7071

7172
if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
7273
return MayAlias;

‎llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,11 @@ static StringRef computeDataLayout(const Triple &TT) {
295295
}
296296

297297
// 32-bit private, local, and region pointers. 64-bit global, constant and
298-
// flat.
298+
// flat, non-integral buffer fat pointers.
299299
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
300300
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
301-
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
301+
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
302+
"-ni:7";
302303
}
303304

304305
LLVM_READNONE

‎llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,8 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
253253
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
254254
if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
255255
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
256-
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
256+
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
257+
AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
257258
return 512;
258259
}
259260

‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
10461046
return isLegalGlobalAddressingMode(AM);
10471047

10481048
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
1049-
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
1049+
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
1050+
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
10501051
// If the offset isn't a multiple of 4, it probably isn't going to be
10511052
// correctly aligned.
10521053
// FIXME: Can we get the real alignment here?

‎llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll

+40
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,43 @@ define void @test_1_999(i8 addrspace(1)* %p, i8 addrspace(999)* %p1) {
5050
define void @test_999_1(i8 addrspace(999)* %p, i8 addrspace(1)* %p1) {
5151
ret void
5252
}
53+
54+
; CHECK: MayAlias: i8 addrspace(7)* %p, i8* %p1
55+
define void @test_7_0(i8 addrspace(7)* %p, i8 addrspace(0)* %p1) {
56+
ret void
57+
}
58+
59+
; CHECK: MayAlias: i8 addrspace(1)* %p1, i8 addrspace(7)* %p
60+
define void @test_7_1(i8 addrspace(7)* %p, i8 addrspace(1)* %p1) {
61+
ret void
62+
}
63+
64+
; CHECK: NoAlias: i8 addrspace(2)* %p1, i8 addrspace(7)* %p
65+
define void @test_7_2(i8 addrspace(7)* %p, i8 addrspace(2)* %p1) {
66+
ret void
67+
}
68+
69+
; CHECK: NoAlias: i8 addrspace(3)* %p1, i8 addrspace(7)* %p
70+
define void @test_7_3(i8 addrspace(7)* %p, i8 addrspace(3)* %p1) {
71+
ret void
72+
}
73+
74+
; CHECK: MayAlias: i8 addrspace(4)* %p1, i8 addrspace(7)* %p
75+
define void @test_7_4(i8 addrspace(7)* %p, i8 addrspace(4)* %p1) {
76+
ret void
77+
}
78+
79+
; CHECK: NoAlias: i8 addrspace(5)* %p1, i8 addrspace(7)* %p
80+
define void @test_7_5(i8 addrspace(7)* %p, i8 addrspace(5)* %p1) {
81+
ret void
82+
}
83+
84+
; CHECK: MayAlias: i8 addrspace(6)* %p1, i8 addrspace(7)* %p
85+
define void @test_7_6(i8 addrspace(7)* %p, i8 addrspace(6)* %p1) {
86+
ret void
87+
}
88+
89+
; CHECK: MayAlias: i8 addrspace(7)* %p, i8 addrspace(7)* %p1
90+
define void @test_7_7(i8 addrspace(7)* %p, i8 addrspace(7)* %p1) {
91+
ret void
92+
}
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
22

3-
; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(7)* %p1
4-
5-
define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(7)* %p1) {
3+
; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(999)* %p1
4+
define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(999)* %p1) {
65
ret void
76
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; RUN: opt -S -mtriple=amdgcn-- -load-store-vectorizer < %s | FileCheck -check-prefix=OPT %s
2+
3+
; OPT-LABEL: @func(
4+
define void @func(i32 addrspace(7)* %out) {
5+
entry:
6+
%a0 = getelementptr i32, i32 addrspace(7)* %out, i32 0
7+
%a1 = getelementptr i32, i32 addrspace(7)* %out, i32 1
8+
%a2 = getelementptr i32, i32 addrspace(7)* %out, i32 2
9+
%a3 = getelementptr i32, i32 addrspace(7)* %out, i32 3
10+
11+
; OPT: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> addrspace(7)* %0, align 4
12+
store i32 0, i32 addrspace(7)* %a0
13+
store i32 1, i32 addrspace(7)* %a1
14+
store i32 2, i32 addrspace(7)* %a2
15+
store i32 3, i32 addrspace(7)* %a3
16+
ret void
17+
}

0 commit comments

Comments
 (0)
Please sign in to comment.