Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -9031,7 +9031,7 @@ (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); - if (IsHIPKernel) + if (M.getLangOpts().HIP) F->addFnAttr("uniform-work-group-size", "true"); @@ -9057,7 +9057,7 @@ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); - } else if (IsOpenCLKernel || IsHIPKernel) { + } else { // By default, restrict the maximum size to a value specified by // --gpu-max-threads-per-block=n or its default value. std::string AttrVal = Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu @@ -16,6 +16,10 @@ // CHECK: define amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]] } +__device__ void func_flat_work_group_size_default() { +// CHECK: define void @_Z33func_flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC:#[0-9]+]] +} + __attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics __global__ void flat_work_group_size_32_64() { // CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]] @@ -40,7 +44,11 @@ // NAMD-NOT: "amdgpu-num-sgpr" // DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" +// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true" + // MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" +// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024" + // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64" // CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32" Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -190,5 +190,5 @@ // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" // CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} +// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" // CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"