Index: include/clang/Basic/LangOptions.h =================================================================== --- include/clang/Basic/LangOptions.h +++ include/clang/Basic/LangOptions.h @@ -197,6 +197,10 @@ bool allowsNonTrivialObjCLifetimeQualifiers() const { return ObjCAutoRefCount || ObjCWeak; } + + bool assumeFunctionsAreConvergent() const { + return (CUDA && CUDAIsDevice) || OpenCL; + } }; /// \brief Floating point control options Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1750,13 +1750,16 @@ FuncAttrs.addAttribute("backchain"); } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Conservatively, mark all functions and calls in CUDA as convergent - // (meaning, they may call an intrinsically convergent op, such as - // __syncthreads(), and so can't have certain optimizations applied around - // them). LLVM will remove this attribute where it safely can. + if (getLangOpts().assumeFunctionsAreConvergent()) { + // Conservatively, mark all functions and calls in CUDA and OpenCL as + // convergent (meaning, they may call an intrinsically convergent op, such + // as __syncthreads() / barrier(), and so can't have certain optimizations + // applied around them). LLVM will remove this attribute where it safely + // can. FuncAttrs.addAttribute(llvm::Attribute::Convergent); + } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { // Exceptions aren't supported in CUDA device code. FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); Index: test/CodeGenOpenCL/amdgpu-attrs.cl =================================================================== --- test/CodeGenOpenCL/amdgpu-attrs.cl +++ test/CodeGenOpenCL/amdgpu-attrs.cl @@ -151,28 +151,28 @@ // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" - -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" - -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" - -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" + +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" + +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" + +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" Index: test/CodeGenOpenCL/convergent.cl =================================================================== --- test/CodeGenOpenCL/convergent.cl +++ test/CodeGenOpenCL/convergent.cl @@ -1,9 +1,19 @@ -// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck %s +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s + +// This is initially assumed convergent, but can be deduced to not require it. + +// CHECK-LABEL: define spir_func void @non_convfun() local_unnamed_addr #0 +// CHECK: ret void +__attribute__((noinline)) +void non_convfun(void) { + volatile int* p; + *p = 0; +} void convfun(void) __attribute__((convergent)); -void non_convfun(void); void nodupfun(void) __attribute__((noduplicate)); +// External functions should be assumed convergent. void f(void); void g(void); @@ -17,19 +27,23 @@ // non_convfun(); // } // -// CHECK: define spir_func void @test_merge_if(i32 %[[a:.+]]) -// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0 +// CHECK-LABEL: define spir_func void @test_merge_if(i32 %a) local_unnamed_addr #1 { +// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0 // CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]] + // CHECK: [[if_then]]: // CHECK: tail call spir_func void @f() // CHECK: tail call spir_func void @non_convfun() // CHECK: tail call spir_func void @g() + // CHECK: br label %[[if_end3:.+]] + // CHECK: [[if_end3_critedge]]: // CHECK: tail call spir_func void @non_convfun() // CHECK: br label %[[if_end3]] + // CHECK: [[if_end3]]: -// CHECK-LABEL: ret void +// CHECK: ret void void test_merge_if(int a) { if (a) { @@ -41,13 +55,13 @@ } } -// CHECK-DAG: declare spir_func void @f() -// CHECK-DAG: declare spir_func void @non_convfun() -// CHECK-DAG: declare spir_func void @g() +// CHECK-DAG: declare spir_func void @f() local_unnamed_addr #2 +// CHECK-DAG: declare spir_func void @g() local_unnamed_addr #2 + // Test two if's are not merged. -// CHECK: define spir_func void @test_no_merge_if(i32 %[[a:.+]]) -// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0 +// CHECK-LABEL: define spir_func void @test_no_merge_if(i32 %a) local_unnamed_addr #1 +// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0 // CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]] // CHECK: [[if_then]]: // CHECK: tail call spir_func void @f() @@ -56,7 +70,7 @@ // CHECK: br label %[[if_end]] // CHECK: [[if_end]]: // CHECK: %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ] -// CHECK: tail call spir_func void @convfun() #[[attr5:.+]] +// CHECK: tail call spir_func void @convfun() #[[attr4:.+]] // CHECK: br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]] // CHECK: [[if_then2]]: // CHECK: tail call spir_func void @g() @@ -74,20 +88,20 @@ } } -// CHECK: declare spir_func void @convfun(){{[^#]*}} #[[attr2:[0-9]+]] +// CHECK: declare spir_func void @convfun(){{[^#]*}} #2 // Test loop is unrolled for convergent function. -// CHECK-LABEL: define spir_func void @test_unroll() -// CHECK: tail call spir_func void @convfun() #[[attr5:[0-9]+]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] -// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK-LABEL: define spir_func void @test_unroll() local_unnamed_addr #1 +// CHECK: tail call spir_func void @convfun() #[[attr4:[0-9]+]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] +// CHECK: tail call spir_func void @convfun() #[[attr4]] // CHECK-LABEL: ret void void test_unroll() { @@ -101,7 +115,7 @@ // CHECK: [[for_cond_cleanup:.+]]: // CHECK: ret void // CHECK: [[for_body]]: -// CHECK: tail call spir_func void @nodupfun() #[[attr6:[0-9]+]] +// CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]] // CHECK-NOT: call spir_func void @nodupfun() // CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] @@ -112,7 +126,9 @@ // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]] -// CHECK-DAG: attributes #[[attr2]] = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK-DAG: attributes #[[attr3]] = { {{[^}]*}}noduplicate{{[^}]*}} } -// CHECK-DAG: attributes #[[attr5]] = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK-DAG: attributes #[[attr6]] = { {{[^}]*}}noduplicate{{[^}]*}} } +// CHECK: attributes #0 = { noinline norecurse nounwind " +// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } +// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }