Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -12425,6 +12425,7 @@ // FIXME: Apply default attributes F->addFnAttr(llvm::Attribute::NoUnwind); + F->addFnAttr(llvm::Attribute::Convergent); Builder.CreateRetVoid(); Builder.restoreIP(IP); @@ -12476,6 +12477,7 @@ &CGF.CGM.getModule()); // FIXME: Apply default attributes F->addFnAttr(llvm::Attribute::NoUnwind); + F->addFnAttr(llvm::Attribute::Convergent); F->addFnAttr("enqueued-block"); auto IP = CGF.Builder.saveIP(); Index: clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -182,7 +182,7 @@ // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: @@ -216,7 +216,7 @@ // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: @@ -255,7 +255,7 @@ // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 { // CHECK-NEXT: entry: @@ -282,7 +282,7 @@ // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: nounwind +// CHECK: Function Attrs: convergent nounwind // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel // CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: @@ -297,7 +297,7 @@ // CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" } // CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } // CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #4 = { nounwind "enqueued-block" } +// CHECK: attributes #4 = { convergent nounwind "enqueued-block" } // CHECK: attributes #5 = { convergent } //. // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} Index: clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl =================================================================== --- clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -315,7 +315,7 @@ }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) [[INVOKE_ATTR:#[0-9]+]] block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -411,7 +411,7 @@ // COMMON: ret void // COMMON: } // COMMON: define spir_kernel void [[INVLK2]](i8 addrspace(4)*{{.*}}) -// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) +// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) [[INVOKE_ATTR:#[0-9]+]] // COMMON: define spir_kernel void [[INVGK2]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define spir_kernel void [[INVGK3]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define spir_kernel void [[INVGK4]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) @@ -430,3 +430,5 @@ // COMMON: define spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}}) // COMMON: define spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}}) + +// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind }