Skip to content

Commit ccf2f73

Browse files
committedJan 3, 2017
[OpenMP] Code cleanup for NVPTX OpenMP codegen
This patch cleans up private methods for NVPTX OpenMP codegen. It converts private members to static functions to follow the coding style of CGOpenMPRuntime.cpp and declutter the header file. Reviewers: ABataev Differential Revision: https://reviews.llvm.org/D28124 llvm-svn: 290904
1 parent ada846a commit ccf2f73

File tree

2 files changed

+31
-65
lines changed

2 files changed

+31
-65
lines changed
 

Diff for: ‎clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

+31-33
Original file line numberDiff line numberDiff line change
@@ -20,53 +20,64 @@
2020
using namespace clang;
2121
using namespace CodeGen;
2222

23-
/// \brief Get the GPU warp size.
24-
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
23+
namespace {
24+
enum OpenMPRTLFunctionNVPTX {
25+
/// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
26+
/// kmp_int32 thread_limit);
27+
OMPRTL_NVPTX__kmpc_kernel_init,
28+
};
29+
30+
// NVPTX Address space
31+
enum AddressSpace {
32+
AddressSpaceShared = 3,
33+
};
34+
} // namespace
35+
36+
/// Get the GPU warp size.
37+
static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
2538
CGBuilderTy &Bld = CGF.Builder;
2639
return Bld.CreateCall(
2740
llvm::Intrinsic::getDeclaration(
28-
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
41+
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
2942
llvm::None, "nvptx_warp_size");
3043
}
3144

32-
/// \brief Get the id of the current thread on the GPU.
33-
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
45+
/// Get the id of the current thread on the GPU.
46+
static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
3447
CGBuilderTy &Bld = CGF.Builder;
3548
return Bld.CreateCall(
3649
llvm::Intrinsic::getDeclaration(
37-
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
50+
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
3851
llvm::None, "nvptx_tid");
3952
}
4053

41-
// \brief Get the maximum number of threads in a block of the GPU.
42-
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
54+
/// Get the maximum number of threads in a block of the GPU.
55+
static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
4356
CGBuilderTy &Bld = CGF.Builder;
4457
return Bld.CreateCall(
4558
llvm::Intrinsic::getDeclaration(
46-
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
59+
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
4760
llvm::None, "nvptx_num_threads");
4861
}
4962

50-
/// \brief Get barrier to synchronize all threads in a block.
51-
void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
63+
/// Get barrier to synchronize all threads in a block.
64+
static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
5265
CGBuilderTy &Bld = CGF.Builder;
5366
Bld.CreateCall(llvm::Intrinsic::getDeclaration(
54-
&CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
67+
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
5568
}
5669

57-
// \brief Synchronize all GPU threads in a block.
58-
void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
59-
getNVPTXCTABarrier(CGF);
60-
}
70+
/// Synchronize all GPU threads in a block.
71+
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
6172

62-
/// \brief Get the thread id of the OMP master thread.
73+
/// Get the thread id of the OMP master thread.
6374
/// The master thread id is the first thread (lane) of the last warp in the
6475
/// GPU block. Warp size is assumed to be some power of 2.
6576
/// Thread id is 0 indexed.
6677
/// E.g: If NumThreads is 33, master id is 32.
6778
/// If NumThreads is 64, master id is 32.
6879
/// If NumThreads is 1024, master id is 992.
69-
llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
80+
static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
7081
CGBuilderTy &Bld = CGF.Builder;
7182
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
7283

@@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
7788
Bld.CreateNot(Mask), "master_tid");
7889
}
7990

80-
namespace {
81-
enum OpenMPRTLFunctionNVPTX {
82-
/// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
83-
/// kmp_int32 thread_limit);
84-
OMPRTL_NVPTX__kmpc_kernel_init,
85-
};
86-
87-
// NVPTX Address space
88-
enum ADDRESS_SPACE {
89-
ADDRESS_SPACE_SHARED = 3,
90-
};
91-
} // namespace
92-
9391
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
9492
CodeGenModule &CGM)
9593
: WorkerFn(nullptr), CGFI(nullptr) {
@@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
119117
CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
120118
llvm::GlobalValue::CommonLinkage,
121119
llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
122-
llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
120+
llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
123121
ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
124122

125123
WorkID = new llvm::GlobalVariable(
126124
CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
127125
llvm::GlobalValue::CommonLinkage,
128126
llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
129-
llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
127+
llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
130128
WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
131129
}
132130

Diff for: ‎clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h

-32
Original file line numberDiff line numberDiff line change
@@ -49,38 +49,6 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
4949
void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
5050

5151
private:
52-
//
53-
// NVPTX calls.
54-
//
55-
56-
/// \brief Get the GPU warp size.
57-
llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
58-
59-
/// \brief Get the id of the current thread on the GPU.
60-
llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
61-
62-
// \brief Get the maximum number of threads in a block of the GPU.
63-
llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
64-
65-
/// \brief Get barrier to synchronize all threads in a block.
66-
void getNVPTXCTABarrier(CodeGenFunction &CGF);
67-
68-
// \brief Synchronize all GPU threads in a block.
69-
void syncCTAThreads(CodeGenFunction &CGF);
70-
71-
//
72-
// OMP calls.
73-
//
74-
75-
/// \brief Get the thread id of the OMP master thread.
76-
/// The master thread id is the first thread (lane) of the last warp in the
77-
/// GPU block. Warp size is assumed to be some power of 2.
78-
/// Thread id is 0 indexed.
79-
/// E.g: If NumThreads is 33, master id is 32.
80-
/// If NumThreads is 64, master id is 32.
81-
/// If NumThreads is 1024, master id is 992.
82-
llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
83-
8452
//
8553
// Private state and methods.
8654
//

0 commit comments

Comments
 (0)
Please sign in to comment.