Skip to content

Commit 1a14bfa

Browse files
committedMar 27, 2017
[AMDGPU] Get address space mapping by target triple environment
As we introduced target triple environment amdgiz and amdgizcl, the address space values are no longer enums. We have to decide the value by target triple. The basic idea is to use struct AMDGPUAS to represent address space values. For address space values which are not depend on target triple, use static const members, so that they don't occupy extra memory space and is equivalent to a compile time constant. Since the struct is lightweight and cheap, it can be created on the fly at the point of usage. Or it can be added as member to a pass and created at the beginning of the run* function. Differential Revision: https://reviews.llvm.org/D31284 llvm-svn: 298846
1 parent f57ae33 commit 1a14bfa

39 files changed

+446
-290
lines changed
 

‎llvm/lib/Target/AMDGPU/AMDGPU.h

+39-28
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class Pass;
2323
class Target;
2424
class TargetMachine;
2525
class PassRegistry;
26+
class Module;
2627

2728
// R600 Passes
2829
FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
@@ -150,43 +151,53 @@ enum TargetIndex {
150151
/// however on the GPU, each address space points to
151152
/// a separate piece of memory that is unique from other
152153
/// memory locations.
153-
namespace AMDGPUAS {
154-
enum AddressSpaces : unsigned {
155-
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
156-
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
157-
CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
158-
LOCAL_ADDRESS = 3, ///< Address space for local memory.
159-
FLAT_ADDRESS = 4, ///< Address space for flat memory.
160-
REGION_ADDRESS = 5, ///< Address space for region memory.
161-
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
162-
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
154+
struct AMDGPUAS {
155+
// The following address space values depend on the triple environment.
156+
unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
157+
unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2)
158+
unsigned FLAT_ADDRESS; ///< Address space for flat memory.
159+
unsigned REGION_ADDRESS; ///< Address space for region memory.
160+
161+
// The maximum value for flat, generic, local, private, constant and region.
162+
const static unsigned MAX_COMMON_ADDRESS = 5;
163+
164+
const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0).
165+
const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory.
166+
const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0)
167+
const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1)
163168

164169
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
165170
// order to be able to dynamically index a constant buffer, for example:
166171
//
167172
// ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
168173

169-
CONSTANT_BUFFER_0 = 8,
170-
CONSTANT_BUFFER_1 = 9,
171-
CONSTANT_BUFFER_2 = 10,
172-
CONSTANT_BUFFER_3 = 11,
173-
CONSTANT_BUFFER_4 = 12,
174-
CONSTANT_BUFFER_5 = 13,
175-
CONSTANT_BUFFER_6 = 14,
176-
CONSTANT_BUFFER_7 = 15,
177-
CONSTANT_BUFFER_8 = 16,
178-
CONSTANT_BUFFER_9 = 17,
179-
CONSTANT_BUFFER_10 = 18,
180-
CONSTANT_BUFFER_11 = 19,
181-
CONSTANT_BUFFER_12 = 20,
182-
CONSTANT_BUFFER_13 = 21,
183-
CONSTANT_BUFFER_14 = 22,
184-
CONSTANT_BUFFER_15 = 23,
174+
const static unsigned CONSTANT_BUFFER_0 = 8;
175+
const static unsigned CONSTANT_BUFFER_1 = 9;
176+
const static unsigned CONSTANT_BUFFER_2 = 10;
177+
const static unsigned CONSTANT_BUFFER_3 = 11;
178+
const static unsigned CONSTANT_BUFFER_4 = 12;
179+
const static unsigned CONSTANT_BUFFER_5 = 13;
180+
const static unsigned CONSTANT_BUFFER_6 = 14;
181+
const static unsigned CONSTANT_BUFFER_7 = 15;
182+
const static unsigned CONSTANT_BUFFER_8 = 16;
183+
const static unsigned CONSTANT_BUFFER_9 = 17;
184+
const static unsigned CONSTANT_BUFFER_10 = 18;
185+
const static unsigned CONSTANT_BUFFER_11 = 19;
186+
const static unsigned CONSTANT_BUFFER_12 = 20;
187+
const static unsigned CONSTANT_BUFFER_13 = 21;
188+
const static unsigned CONSTANT_BUFFER_14 = 22;
189+
const static unsigned CONSTANT_BUFFER_15 = 23;
185190

186191
// Some places use this if the address space can't be determined.
187-
UNKNOWN_ADDRESS_SPACE = ~0u
192+
const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u;
188193
};
189194

190-
} // namespace AMDGPUAS
195+
namespace llvm {
196+
namespace AMDGPU {
197+
AMDGPUAS getAMDGPUAS(const Module &M);
198+
AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
199+
AMDGPUAS getAMDGPUAS(Triple T);
200+
} // namespace AMDGPU
201+
} // namespace llvm
191202

192203
#endif

‎llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

+50-17
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,60 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
3737
AU.setPreservesAll();
3838
}
3939

40+
// Must match the table in getAliasResult.
41+
AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_) : AS(AS_) {
42+
// These arrarys are indexed by address space value
43+
// enum elements 0 ... to 5
44+
static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
45+
/* Private Global Constant Group Flat Region*/
46+
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
47+
/* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
48+
/* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
49+
/* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
50+
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
51+
/* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
52+
};
53+
static const AliasResult ASAliasRulesGenIsZero[6][6] = {
54+
/* Flat Global Region Group Constant Private */
55+
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
56+
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
57+
/* Region */ {NoAlias , NoAlias , MayAlias, NoAlias, NoAlias , MayAlias},
58+
/* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
59+
/* Constant */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
60+
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
61+
};
62+
assert(AS.MAX_COMMON_ADDRESS <= 5);
63+
if (AS.FLAT_ADDRESS == 0) {
64+
assert(AS.GLOBAL_ADDRESS == 1 &&
65+
AS.REGION_ADDRESS == 2 &&
66+
AS.LOCAL_ADDRESS == 3 &&
67+
AS.CONSTANT_ADDRESS == 4 &&
68+
AS.PRIVATE_ADDRESS == 5);
69+
ASAliasRules = &ASAliasRulesGenIsZero;
70+
} else {
71+
assert(AS.PRIVATE_ADDRESS == 0 &&
72+
AS.GLOBAL_ADDRESS == 1 &&
73+
AS.CONSTANT_ADDRESS == 2 &&
74+
AS.LOCAL_ADDRESS == 3 &&
75+
AS.FLAT_ADDRESS == 4 &&
76+
AS.REGION_ADDRESS == 5);
77+
ASAliasRules = &ASAliasRulesPrivIsZero;
78+
}
79+
}
80+
81+
AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
82+
unsigned AS2) const {
83+
if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS)
84+
report_fatal_error("Pointer address space out of range");
85+
return (*ASAliasRules)[AS1][AS2];
86+
}
87+
4088
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
4189
const MemoryLocation &LocB) {
42-
// This array is indexed by the AMDGPUAS::AddressSpaces
43-
// enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS
44-
// see "llvm/Transforms/AMDSPIRUtils.h"
45-
static const AliasResult ASAliasRules[5][5] = {
46-
/* Private Global Constant Group Flat */
47-
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias},
48-
/* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
49-
/* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
50-
/* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
51-
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}
52-
};
5390
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
5491
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
55-
if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS ||
56-
asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS)
57-
report_fatal_error("Pointer address space out of range");
5892

59-
AliasResult Result = ASAliasRules[asA][asB];
93+
AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
6094
if (Result == NoAlias) return Result;
6195

6296
if (isa<Argument>(LocA.Ptr) && isa<Argument>(LocB.Ptr)) {
@@ -75,8 +109,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
75109
bool OrLocal) {
76110
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
77111

78-
if (Base->getType()->getPointerAddressSpace() ==
79-
AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) {
112+
if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
80113
return true;
81114
}
82115

‎llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h

+19-4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
1414
#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
1515

16+
#include "AMDGPU.h"
1617
#include "llvm/Analysis/AliasAnalysis.h"
1718
#include "llvm/IR/Function.h"
1819
#include "llvm/IR/Module.h"
@@ -25,11 +26,14 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
2526
friend AAResultBase<AMDGPUAAResult>;
2627

2728
const DataLayout &DL;
29+
AMDGPUAS AS;
2830

2931
public:
30-
explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
32+
explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
33+
DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {}
3134
AMDGPUAAResult(AMDGPUAAResult &&Arg)
32-
: AAResultBase(std::move(Arg)), DL(Arg.DL){}
35+
: AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
36+
ASAliasRules(Arg.ASAliasRules){}
3337

3438
/// Handle invalidation events from the new pass manager.
3539
///
@@ -42,6 +46,15 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
4246
private:
4347
bool Aliases(const MDNode *A, const MDNode *B) const;
4448
bool PathAliases(const MDNode *A, const MDNode *B) const;
49+
50+
class ASAliasRulesTy {
51+
public:
52+
ASAliasRulesTy(AMDGPUAS AS_);
53+
AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
54+
private:
55+
AMDGPUAS AS;
56+
const AliasResult (*ASAliasRules)[6][6];
57+
} ASAliasRules;
4558
};
4659

4760
/// Analysis pass providing a never-invalidated alias analysis result.
@@ -53,7 +66,8 @@ class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
5366
typedef AMDGPUAAResult Result;
5467

5568
AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
56-
return AMDGPUAAResult(F.getParent()->getDataLayout());
69+
return AMDGPUAAResult(F.getParent()->getDataLayout(),
70+
Triple(F.getParent()->getTargetTriple()));
5771
}
5872
};
5973

@@ -72,7 +86,8 @@ class AMDGPUAAWrapperPass : public ImmutablePass {
7286
const AMDGPUAAResult &getResult() const { return *Result; }
7387

7488
bool doInitialization(Module &M) override {
75-
Result.reset(new AMDGPUAAResult(M.getDataLayout()));
89+
Result.reset(new AMDGPUAAResult(M.getDataLayout(),
90+
Triple(M.getTargetTriple())));
7691
return false;
7792
}
7893
bool doFinalization(Module &M) override {

‎llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp

+22-15
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ namespace {
2828
class AMDGPUAnnotateKernelFeatures : public ModulePass {
2929
private:
3030
const TargetMachine *TM;
31-
static bool hasAddrSpaceCast(const Function &F);
31+
AMDGPUAS AS;
32+
static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
3233

3334
void addAttrToCallers(Function *Intrin, StringRef AttrName);
3435
bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
@@ -48,10 +49,11 @@ class AMDGPUAnnotateKernelFeatures : public ModulePass {
4849
ModulePass::getAnalysisUsage(AU);
4950
}
5051

51-
static bool visitConstantExpr(const ConstantExpr *CE);
52+
static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
5253
static bool visitConstantExprsRecursively(
5354
const Constant *EntryC,
54-
SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
55+
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
56+
AMDGPUAS AS);
5557
};
5658

5759
}
@@ -65,26 +67,29 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
6567

6668

6769
// The queue ptr is only needed when casting to flat, not from it.
68-
static bool castRequiresQueuePtr(unsigned SrcAS) {
69-
return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
70+
static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
71+
return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
7072
}
7173

72-
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
73-
return castRequiresQueuePtr(ASC->getSrcAddressSpace());
74+
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
75+
const AMDGPUAS &AS) {
76+
return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
7477
}
7578

76-
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
79+
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
80+
AMDGPUAS AS) {
7781
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
7882
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
79-
return castRequiresQueuePtr(SrcAS);
83+
return castRequiresQueuePtr(SrcAS, AS);
8084
}
8185

8286
return false;
8387
}
8488

8589
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
8690
const Constant *EntryC,
87-
SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
91+
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
92+
AMDGPUAS AS) {
8893

8994
if (!ConstantExprVisited.insert(EntryC).second)
9095
return false;
@@ -97,7 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
97102

98103
// Check this constant expression.
99104
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
100-
if (visitConstantExpr(CE))
105+
if (visitConstantExpr(CE, AS))
101106
return true;
102107
}
103108

@@ -118,13 +123,14 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
118123
}
119124

120125
// Return true if an addrspacecast is used that requires the queue ptr.
121-
bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
126+
bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
127+
AMDGPUAS AS) {
122128
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
123129

124130
for (const BasicBlock &BB : F) {
125131
for (const Instruction &I : BB) {
126132
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
127-
if (castRequiresQueuePtr(ASC))
133+
if (castRequiresQueuePtr(ASC, AS))
128134
return true;
129135
}
130136

@@ -133,7 +139,7 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
133139
if (!OpC)
134140
continue;
135141

136-
if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
142+
if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
137143
return true;
138144
}
139145
}
@@ -173,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
173179

174180
bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
175181
Triple TT(M.getTargetTriple());
182+
AS = AMDGPU::getAMDGPUAS(M);
176183

177184
static const StringRef IntrinsicToAttr[][2] = {
178185
// .x omitted
@@ -216,7 +223,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
216223

217224
bool HasApertureRegs =
218225
TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
219-
if (!HasApertureRegs && hasAddrSpaceCast(F))
226+
if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
220227
F.addFnAttr("amdgpu-queue-ptr");
221228
}
222229
}

‎llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
3737
LoopInfo *LI;
3838
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
3939
bool isKernelFunc;
40+
AMDGPUAS AMDGPUASI;
4041

4142
public:
4243
static char ID;
@@ -130,8 +131,8 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
130131
Value *Ptr = I.getPointerOperand();
131132
if (!DA->isUniform(Ptr))
132133
return;
133-
auto isGlobalLoad = [](LoadInst &Load)->bool {
134-
return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
134+
auto isGlobalLoad = [&](LoadInst &Load)->bool {
135+
return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
135136
};
136137
// We're tracking up to the Function boundaries
137138
// We cannot go beyond because of FunctionPass restrictions
@@ -166,6 +167,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
166167
}
167168

168169
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
170+
AMDGPUASI = AMDGPU::getAMDGPUAS(M);
169171
return false;
170172
}
171173

0 commit comments

Comments
 (0)
Please sign in to comment.