Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -7747,6 +7747,18 @@ Write, }; +static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, + llvm::Type *RegisterType, + llvm::Type *ValueType) { + CodeGen::CGBuilderTy &Builder = CGF.Builder; + CodeGen::CodeGenModule &CGM = CGF.CGM; + + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + return Call; +} + // Generates the IR for the read/write special register builtin, // ValueType is the type of the value that is to be written or read, // RegisterType is the type of the register being written to or read from. @@ -17479,20 +17491,10 @@ llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy}); return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } - case AMDGPU::BI__builtin_amdgcn_read_exec: { - CallInst *CI = cast( - EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec")); - CI->setConvergent(); - return CI; - } + case AMDGPU::BI__builtin_amdgcn_read_exec: case AMDGPU::BI__builtin_amdgcn_read_exec_lo: case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { - StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? - "exec_lo" : "exec_hi"; - CallInst *CI = cast( - EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName)); - CI->setConvergent(); - return CI; + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); } case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -510,21 +510,23 @@ } // CHECK-LABEL: @test_read_exec( -// CHECK: call i64 @llvm.read_register.i64(metadata ![[$EXEC:[0-9]+]]) #[[$READ_EXEC_ATTRS:[0-9]+]] +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) void test_read_exec(global ulong* out) { *out = __builtin_amdgcn_read_exec(); } -// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[$NOUNWIND_READONLY:[0-9]+]] +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_lo( -// CHECK: call i32 @llvm.read_register.i32(metadata ![[$EXEC_LO:[0-9]+]]) #[[$READ_EXEC_ATTRS]] +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) void test_read_exec_lo(global uint* out) { *out = __builtin_amdgcn_read_exec_lo(); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.read_register.i32(metadata ![[$EXEC_HI:[0-9]+]]) #[[$READ_EXEC_ATTRS]] +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } @@ -830,8 +832,4 @@ // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} -// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(read) } -// CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } -// CHECK-DAG: ![[$EXEC]] = !{!"exec"} -// CHECK-DAG: ![[$EXEC_LO]] = !{!"exec_lo"} -// CHECK-DAG: ![[$EXEC_HI]] = !{!"exec_hi"} +// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }