Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -1056,6 +1056,11 @@ /// \brief Get address space for OpenCL type. virtual LangAS getOpenCLTypeAddrSpace(const Type *T) const; + /// \returns Maximum device supported OpenCL workgroup size. + virtual unsigned getOpenCLMaxWorkGroupSize(unsigned Dim) const { + return 0; + } + /// \returns Target specific vtbl ptr address space. virtual unsigned getVtblPtrAddressSpace() const { return 0; Index: lib/Basic/Targets/AMDGPU.h =================================================================== --- lib/Basic/Targets/AMDGPU.h +++ lib/Basic/Targets/AMDGPU.h @@ -70,6 +70,10 @@ bool hasLDEXPF : 1; const AddrSpace AS; + // The maximum supported group size is 1024, but some runtimes currently only + // support 256. + unsigned MaxWorkGroupSize = 1024; + static bool hasFullSpeedFMAF32(StringRef GPUName) { return parseAMDGCNName(GPUName) >= GK_GFX9; } @@ -283,6 +287,10 @@ return getLangASFromTargetAS(AS.Constant); } + unsigned getOpenCLMaxWorkGroupSize(unsigned Dim) const override { + return MaxWorkGroupSize; + } + /// \returns Target specific vtbl ptr address space. unsigned getVtblPtrAddressSpace() const override { return AS.Constant; } Index: lib/Basic/Targets/AMDGPU.cpp =================================================================== --- lib/Basic/Targets/AMDGPU.cpp +++ lib/Basic/Targets/AMDGPU.cpp @@ -344,7 +344,7 @@ void AMDGPUTargetInfo::adjust(LangOptions &Opts) { TargetInfo::adjust(Opts); setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple())); -} + ArrayRef AMDGPUTargetInfo::getTargetBuiltins() const { return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9137,22 +9137,28 @@ // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: - return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(0)); case AMDGPU::BI__builtin_amdgcn_workitem_id_y: - return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(1)); case AMDGPU::BI__builtin_amdgcn_workitem_id_z: - return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(2)); // r600 intrinsics case AMDGPU::BI__builtin_r600_recipsqrt_ieee: case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee); case AMDGPU::BI__builtin_r600_read_tidig_x: - return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(0)); case AMDGPU::BI__builtin_r600_read_tidig_y: - return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(1)); case AMDGPU::BI__builtin_r600_read_tidig_z: - return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, + getContext().getTargetInfo().getOpenCLMaxWorkGroupSize(2)); default: return nullptr; } Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -507,7 +507,7 @@ *out = __builtin_amdgcn_s_getpc(); } -// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 256} // CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } // CHECK-DAG: ![[EXEC]] = !{!"exec"} Index: test/CodeGenOpenCL/builtins-r600.cl =================================================================== --- test/CodeGenOpenCL/builtins-r600.cl +++ test/CodeGenOpenCL/builtins-r600.cl @@ -52,4 +52,4 @@ } } -// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 256}