Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -18,6 +18,9 @@ # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif +//===----------------------------------------------------------------------===// +// Instruction builtins. +//===----------------------------------------------------------------------===// BUILTIN(__builtin_amdgcn_s_barrier, "v", "n") BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") @@ -60,6 +63,11 @@ TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime") //===----------------------------------------------------------------------===// +// Special builtins. +//===----------------------------------------------------------------------===// +BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") + +//===----------------------------------------------------------------------===// // Legacy names with amdgpu prefix //===----------------------------------------------------------------------===// Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -3650,7 +3650,9 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType, bool IsRead) { + llvm::Type *ValueType, + bool IsRead, + StringRef SysReg = "") { // write and register intrinsics only support 32 and 64 bit operations. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) && "Unsupported size for register."); @@ -3659,8 +3661,10 @@ CodeGen::CodeGenModule &CGM = CGF.CGM; LLVMContext &Context = CGM.getLLVMContext(); - const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef SysReg = cast(SysRegStrExpr)->getString(); + if (SysReg.empty()) { + const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); + SysReg = cast(SysRegStrExpr)->getString(); + } llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); @@ -7413,7 +7417,13 @@ case AMDGPU::BI__builtin_amdgcn_classf: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); - // Legacy amdgpu prefix + case AMDGPU::BI__builtin_amdgcn_read_exec: { + CallInst *CI = cast( + EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); + CI->setConvergent(); + return CI; + } + // Legacy amdgpu prefix case AMDGPU::BI__builtin_amdgpu_rsq: case AMDGPU::BI__builtin_amdgpu_rsqf: { if (getTarget().getTriple().getArch() == Triple::amdgcn) Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -253,6 +253,14 @@ *out = __builtin_amdgcn_cubema(a, b, c); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.read_register.i64(metadata ![[EXEC:[0-9]+]]) #[[READ_EXEC_ATTRS:[0-9]+]] +void test_read_exec(global ulong* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]] + // Legacy intrinsics with AMDGPU prefix // CHECK-LABEL: @test_legacy_rsq_f32 @@ -282,3 +290,7 @@ { *out = __builtin_amdgpu_ldexp(a, b); } + +// CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } +// CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } +// CHECK: ![[EXEC]] = !{!"exec"}