Skip to content

Commit 921a423

Browse files
committedApr 6, 2017
[AMDGPU] Translate reqd_work_group_size into amdgpu_flat_work_group_size
These two attributes specify the same info in a different way. AMGPU BE only checks the latter as a target specific attribute as opposed to language specific reqd_work_group_size. This change produces amdgpu_flat_work_group_size out of reqd_work_group_size if specified. Differential Revision: https://reviews.llvm.org/D31728 llvm-svn: 299678
1 parent 95776ad commit 921a423

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed
 

‎clang/lib/CodeGen/TargetInfo.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -7302,9 +7302,14 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
73027302

73037303
llvm::Function *F = cast<llvm::Function>(GV);
73047304

7305-
if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
7306-
unsigned Min = Attr->getMin();
7307-
unsigned Max = Attr->getMax();
7305+
const auto *ReqdWGS = M.getLangOpts().OpenCL ?
7306+
FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
7307+
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
7308+
if (ReqdWGS || FlatWGS) {
7309+
unsigned Min = FlatWGS ? FlatWGS->getMin() : 0;
7310+
unsigned Max = FlatWGS ? FlatWGS->getMax() : 0;
7311+
if (ReqdWGS && Min == 0 && Max == 0)
7312+
Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
73087313

73097314
if (Min != 0) {
73107315
assert(Min <= Max && "Min must be less than or equal Max");

‎clang/test/CodeGenOpenCL/amdgpu-attrs.cl

+12
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,16 @@ kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64(
129129
// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
130130
}
131131

132+
__attribute__((reqd_work_group_size(32, 2, 1))) // expected-no-diagnostics
133+
kernel void reqd_work_group_size_32_2_1() {
134+
// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1() [[FLAT_WORK_GROUP_SIZE_64_64:#[0-9]+]]
135+
}
136+
__attribute__((reqd_work_group_size(32, 2, 1), amdgpu_flat_work_group_size(16, 128))) // expected-no-diagnostics
137+
kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
138+
// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
139+
}
140+
141+
132142
// Make sure this is silently accepted on other targets.
133143
// X86-NOT: "amdgpu-flat-work-group-size"
134144
// X86-NOT: "amdgpu-waves-per-eu"
@@ -142,6 +152,8 @@ kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64(
142152
// CHECK-NOT: "amdgpu-num-vgpr"="0"
143153

144154
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="32,64"
155+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind "amdgpu-flat-work-group-size"="64,64"
156+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind "amdgpu-flat-work-group-size"="16,128"
145157
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind "amdgpu-waves-per-eu"="2"
146158
// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind "amdgpu-waves-per-eu"="2,4"
147159
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind "amdgpu-num-sgpr"="32"

0 commit comments

Comments
 (0)
Please sign in to comment.