20
20
using namespace clang ;
21
21
using namespace CodeGen ;
22
22
23
- // / \brief Get the GPU warp size.
24
- llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize (CodeGenFunction &CGF) {
23
+ namespace {
24
+ enum OpenMPRTLFunctionNVPTX {
25
+ // / \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
26
+ // / kmp_int32 thread_limit);
27
+ OMPRTL_NVPTX__kmpc_kernel_init,
28
+ };
29
+
30
+ // NVPTX Address space
31
+ enum AddressSpace {
32
+ AddressSpaceShared = 3 ,
33
+ };
34
+ } // namespace
35
+
36
+ // / Get the GPU warp size.
37
+ static llvm::Value *getNVPTXWarpSize (CodeGenFunction &CGF) {
25
38
CGBuilderTy &Bld = CGF.Builder ;
26
39
return Bld.CreateCall (
27
40
llvm::Intrinsic::getDeclaration (
28
- &CGM.getModule (), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
41
+ &CGF. CGM .getModule (), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
29
42
llvm::None, " nvptx_warp_size" );
30
43
}
31
44
32
- // / \brief Get the id of the current thread on the GPU.
33
- llvm::Value *CGOpenMPRuntimeNVPTX:: getNVPTXThreadID (CodeGenFunction &CGF) {
45
+ // / Get the id of the current thread on the GPU.
46
+ static llvm::Value *getNVPTXThreadID (CodeGenFunction &CGF) {
34
47
CGBuilderTy &Bld = CGF.Builder ;
35
48
return Bld.CreateCall (
36
49
llvm::Intrinsic::getDeclaration (
37
- &CGM.getModule (), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
50
+ &CGF. CGM .getModule (), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
38
51
llvm::None, " nvptx_tid" );
39
52
}
40
53
41
- // \brief Get the maximum number of threads in a block of the GPU.
42
- llvm::Value *CGOpenMPRuntimeNVPTX:: getNVPTXNumThreads (CodeGenFunction &CGF) {
54
+ // / Get the maximum number of threads in a block of the GPU.
55
+ static llvm::Value *getNVPTXNumThreads (CodeGenFunction &CGF) {
43
56
CGBuilderTy &Bld = CGF.Builder ;
44
57
return Bld.CreateCall (
45
58
llvm::Intrinsic::getDeclaration (
46
- &CGM.getModule (), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
59
+ &CGF. CGM .getModule (), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
47
60
llvm::None, " nvptx_num_threads" );
48
61
}
49
62
50
- // / \brief Get barrier to synchronize all threads in a block.
51
- void CGOpenMPRuntimeNVPTX:: getNVPTXCTABarrier (CodeGenFunction &CGF) {
63
+ // / Get barrier to synchronize all threads in a block.
64
+ static void getNVPTXCTABarrier (CodeGenFunction &CGF) {
52
65
CGBuilderTy &Bld = CGF.Builder ;
53
66
Bld.CreateCall (llvm::Intrinsic::getDeclaration (
54
- &CGM.getModule (), llvm::Intrinsic::nvvm_barrier0));
67
+ &CGF. CGM .getModule (), llvm::Intrinsic::nvvm_barrier0));
55
68
}
56
69
57
- // \brief Synchronize all GPU threads in a block.
58
- void CGOpenMPRuntimeNVPTX::syncCTAThreads (CodeGenFunction &CGF) {
59
- getNVPTXCTABarrier (CGF);
60
- }
70
+ // / Synchronize all GPU threads in a block.
71
+ static void syncCTAThreads (CodeGenFunction &CGF) { getNVPTXCTABarrier (CGF); }
61
72
62
- // / \brief Get the thread id of the OMP master thread.
73
+ // / Get the thread id of the OMP master thread.
63
74
// / The master thread id is the first thread (lane) of the last warp in the
64
75
// / GPU block. Warp size is assumed to be some power of 2.
65
76
// / Thread id is 0 indexed.
66
77
// / E.g: If NumThreads is 33, master id is 32.
67
78
// / If NumThreads is 64, master id is 32.
68
79
// / If NumThreads is 1024, master id is 992.
69
- llvm::Value *CGOpenMPRuntimeNVPTX:: getMasterThreadID (CodeGenFunction &CGF) {
80
+ static llvm::Value *getMasterThreadID (CodeGenFunction &CGF) {
70
81
CGBuilderTy &Bld = CGF.Builder ;
71
82
llvm::Value *NumThreads = getNVPTXNumThreads (CGF);
72
83
@@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
77
88
Bld.CreateNot (Mask), " master_tid" );
78
89
}
79
90
80
- namespace {
81
- enum OpenMPRTLFunctionNVPTX {
82
- // / \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
83
- // / kmp_int32 thread_limit);
84
- OMPRTL_NVPTX__kmpc_kernel_init,
85
- };
86
-
87
- // NVPTX Address space
88
- enum ADDRESS_SPACE {
89
- ADDRESS_SPACE_SHARED = 3 ,
90
- };
91
- } // namespace
92
-
93
91
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState (
94
92
CodeGenModule &CGM)
95
93
: WorkerFn(nullptr ), CGFI(nullptr ) {
@@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
119
117
CGM.getModule (), CGM.Int32Ty , /* isConstant=*/ false ,
120
118
llvm::GlobalValue::CommonLinkage,
121
119
llvm::Constant::getNullValue (CGM.Int32Ty ), " __omp_num_threads" , 0 ,
122
- llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED );
120
+ llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared );
123
121
ActiveWorkers->setAlignment (DL.getPrefTypeAlignment (CGM.Int32Ty ));
124
122
125
123
WorkID = new llvm::GlobalVariable (
126
124
CGM.getModule (), CGM.Int64Ty , /* isConstant=*/ false ,
127
125
llvm::GlobalValue::CommonLinkage,
128
126
llvm::Constant::getNullValue (CGM.Int64Ty ), " __tgt_work_id" , 0 ,
129
- llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED );
127
+ llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared );
130
128
WorkID->setAlignment (DL.getPrefTypeAlignment (CGM.Int64Ty ));
131
129
}
132
130
0 commit comments