Index: llvm/trunk/docs/AMDGPUUsage.rst =================================================================== --- llvm/trunk/docs/AMDGPUUsage.rst +++ llvm/trunk/docs/AMDGPUUsage.rst @@ -281,9 +281,9 @@ .. table:: Address Space Mapping :name: amdgpu-address-space-mapping-table - ================== ================= + ================== ================================= LLVM Address Space Memory Space - ================== ================= + ================== ================================= 0 Generic (Flat) 1 Global 2 Region (GDS) @@ -291,7 +291,15 @@ 4 Constant 5 Private (Scratch) 6 Constant 32-bit - ================== ================= + 7 Buffer Fat Pointer (experimental) + ================== ================================= + +The buffer fat pointer is an experimental address space that is currently +unsupported in the backend. It exposes a non-integral pointer that is in future +intended to support the modelling of 128-bit buffer descriptors + a 32-bit +offset into the buffer descriptor (in total encapsulating a 160-bit 'pointer'), +allowing us to use normal LLVM load/store/atomic operations to model the buffer +descriptors used heavily in graphics workloads targeting the backend. .. _amdgpu-memory-scopes: Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h @@ -245,21 +245,23 @@ namespace AMDGPUAS { enum : unsigned { // The maximum value for flat, generic, local, private, constant and region. - MAX_AMDGPU_ADDRESS = 6, + MAX_AMDGPU_ADDRESS = 7, FLAT_ADDRESS = 0, ///< Address space for flat memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) - CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2) + CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). LOCAL_ADDRESS = 3, ///< Address space for local memory. PRIVATE_ADDRESS = 5, ///< Address space for private memory. - CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory + CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. - /// Address space for direct addressible parameter memory (CONST0) + BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. + + /// Address space for direct addressible parameter memory (CONST0). PARAM_D_ADDRESS = 6, - /// Address space for indirect addressible parameter memory (VTX1) + /// Address space for indirect addressible parameter memory (VTX1). PARAM_I_ADDRESS = 7, // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -53,20 +53,21 @@ AU.setPreservesAll(); } -// These arrays are indexed by address space value enum elements 0 ... to 6 -static const AliasResult ASAliasRules[7][7] = { - /* Flat Global Region Group Constant Private Constant 32-bit */ - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, - /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias}, - /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias , MayAlias}, - /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias}, - /* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias}, - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, - /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias} +// These arrays are indexed by address space value enum elements 0 ... to 7 +static const AliasResult ASAliasRules[8][8] = { + /* Flat Global Region Group Constant Private Constant 32-bit Buffer Fat Ptr */ + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias}, + /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias}, + /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias , NoAlias}, + /* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, + /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, + /* Buffer Fat Ptr */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias} }; static AliasResult getAliasResult(unsigned AS1, unsigned AS2) { - static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range"); + static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 7, "Addr space out of range"); if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) return MayAlias; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -295,10 +295,11 @@ } // 32-bit private, local, and region pointers. 64-bit global, constant and - // flat. + // flat, non-integral buffer fat pointers. return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + "-ni:7"; } LLVM_READNONE Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -253,7 +253,8 @@ unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || - AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT || + AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) { return 512; } Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1046,7 +1046,8 @@ return isLegalGlobalAddressingMode(AM); if (AS == AMDGPUAS::CONSTANT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || + AS == AMDGPUAS::BUFFER_FAT_POINTER) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? Index: llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -50,3 +50,43 @@ define void @test_999_1(i8 addrspace(999)* %p, i8 addrspace(1)* %p1) { ret void } + +; CHECK: MayAlias: i8 addrspace(7)* %p, i8* %p1 +define void @test_7_0(i8 addrspace(7)* %p, i8 addrspace(0)* %p1) { + ret void +} + +; CHECK: MayAlias: i8 addrspace(1)* %p1, i8 addrspace(7)* %p +define void @test_7_1(i8 addrspace(7)* %p, i8 addrspace(1)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(2)* %p1, i8 addrspace(7)* %p +define void @test_7_2(i8 addrspace(7)* %p, i8 addrspace(2)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(3)* %p1, i8 addrspace(7)* %p +define void @test_7_3(i8 addrspace(7)* %p, i8 addrspace(3)* %p1) { + ret void +} + +; CHECK: MayAlias: i8 addrspace(4)* %p1, i8 addrspace(7)* %p +define void @test_7_4(i8 addrspace(7)* %p, i8 addrspace(4)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(5)* %p1, i8 addrspace(7)* %p +define void @test_7_5(i8 addrspace(7)* %p, i8 addrspace(5)* %p1) { + ret void +} + +; CHECK: MayAlias: i8 addrspace(6)* %p1, i8 addrspace(7)* %p +define void @test_7_6(i8 addrspace(7)* %p, i8 addrspace(6)* %p1) { + ret void +} + +; CHECK: MayAlias: i8 addrspace(7)* %p, i8 addrspace(7)* %p1 +define void @test_7_7(i8 addrspace(7)* %p, i8 addrspace(7)* %p1) { + ret void +} Index: llvm/trunk/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll +++ llvm/trunk/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll @@ -1,7 +1,6 @@ ; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s -; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(7)* %p1 - -define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(7)* %p1) { +; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(999)* %p1 +define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(999)* %p1) { ret void } Index: llvm/trunk/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll +++ llvm/trunk/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll @@ -0,0 +1,17 @@ +; RUN: opt -S -mtriple=amdgcn-- -load-store-vectorizer < %s | FileCheck -check-prefix=OPT %s + +; OPT-LABEL: @func( +define void @func(i32 addrspace(7)* %out) { +entry: + %a0 = getelementptr i32, i32 addrspace(7)* %out, i32 0 + %a1 = getelementptr i32, i32 addrspace(7)* %out, i32 1 + %a2 = getelementptr i32, i32 addrspace(7)* %out, i32 2 + %a3 = getelementptr i32, i32 addrspace(7)* %out, i32 3 + +; OPT: store <4 x i32> , <4 x i32> addrspace(7)* %0, align 4 + store i32 0, i32 addrspace(7)* %a0 + store i32 1, i32 addrspace(7)* %a1 + store i32 2, i32 addrspace(7)* %a2 + store i32 3, i32 addrspace(7)* %a3 + ret void +}