Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -12416,6 +12416,13 @@ CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel); F->setCallingConv(KernelCC); + // Inherit attributes from the use context. + // + // TODO: Are there any non-kernel specific attributes we need to take care to + // avoid? + llvm::AttrBuilder KernelAttrs(C, CGF.CurFn->getAttributes().getFnAttrs()); + F->addFnAttrs(KernelAttrs); + auto IP = CGF.Builder.saveIP(); auto *BB = llvm::BasicBlock::Create(C, "entry", F); auto &Builder = CGF.Builder; @@ -12424,10 +12431,6 @@ llvm::CallInst *Call = Builder.CreateCall(Invoke, Args); Call->setCallingConv(Invoke->getCallingConv()); - // FIXME: Apply default attributes - F->addFnAttr(llvm::Attribute::NoUnwind); - F->addFnAttr(llvm::Attribute::Convergent); - Builder.CreateRetVoid(); Builder.restoreIP(IP); return F; @@ -12478,10 +12481,13 @@ &CGF.CGM.getModule()); F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); - // FIXME: Apply default attributes - F->addFnAttr(llvm::Attribute::NoUnwind); - F->addFnAttr(llvm::Attribute::Convergent); - F->addFnAttr("enqueued-block"); + // Inherit attributes from the use context. + // TODO: Are there any non-kernel specific attributes we need to take care to + // avoid? + // FIXME: The invoke isn't applying the right attributes either + llvm::AttrBuilder KernelAttrs(C, CGF.CurFn->getAttributes().getFnAttrs()); + KernelAttrs.addAttribute("enqueued-block"); + F->addFnAttrs(KernelAttrs); auto IP = CGF.Builder.saveIP(); auto *BB = llvm::BasicBlock::Create(C, "entry", F); Index: clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR -// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 %s | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefix=CHECK typedef struct {int a;} ndrange_t; @@ -35,9 +35,40 @@ enqueue_kernel(default_queue, flags, ndrange, block); } + +// Test that target attributes are applied to the functions inserted for the +// block. +__attribute__((target("s-memtime-inst"))) +kernel void test_target_features_kernel(global int *i) { + queue_t default_queue; + unsigned flags = 0; + ndrange_t ndrange; + + enqueue_kernel(default_queue, flags, ndrange, + ^(void) { + __builtin_amdgcn_s_memtime(); + }); +} + +__attribute__((target("s-memtime-inst"))) +void test_target_features_func(global int *i) { + queue_t default_queue; + unsigned flags = 0; + ndrange_t ndrange; + + enqueue_kernel(default_queue, flags, ndrange, + ^(void) { + __builtin_amdgcn_s_memtime(); + }); +} + +//. +// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0 +// CHECK: @__block_literal_global.1 = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_func_block_invoke }, align 8 #0 +//. // CHECK: Function Attrs: convergent noinline norecurse nounwind optnone // CHECK-LABEL: define {{[^@]+}}@callee -// CHECK-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -53,7 +84,7 @@ // // CHECK: Function Attrs: convergent noinline norecurse nounwind optnone // CHECK-LABEL: define {{[^@]+}}@test -// CHECK-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +// CHECK-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5) @@ -167,7 +198,7 @@ // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -182,9 +213,9 @@ // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: convergent nounwind +// CHECK: Function Attrs: convergent norecurse // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel -// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -195,7 +226,7 @@ // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2 -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -216,9 +247,9 @@ // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: convergent nounwind +// CHECK: Function Attrs: convergent norecurse // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel -// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -229,7 +260,7 @@ // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3 -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5) @@ -255,9 +286,9 @@ // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: convergent nounwind +// CHECK: Function Attrs: convergent norecurse // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel -// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 { +// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8 @@ -268,7 +299,7 @@ // // CHECK: Function Attrs: convergent noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4 -// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -278,13 +309,13 @@ // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8 // CHECK-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8 -// CHECK-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR11:[0-9]+]] // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: convergent nounwind +// CHECK: Function Attrs: convergent norecurse // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel -// CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5) // CHECK-NEXT: store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 @@ -292,13 +323,101 @@ // CHECK-NEXT: call void @__test_block_invoke_4(ptr [[TMP2]]) // CHECK-NEXT: ret void // +// +// CHECK: Function Attrs: convergent noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@test_target_features_kernel +// CHECK-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space !15 !kernel_arg_access_qual !8 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !10 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr)) +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent norecurse +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel +// CHECK-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) +// CHECK-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr +// CHECK-NEXT: call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent noinline norecurse nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@test_target_features_func +// CHECK-SAME: (ptr addrspace(1) noundef [[I:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_func_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global.1 to ptr)) +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent noinline nounwind optnone +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_func_block_invoke +// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// CHECK-NEXT: ret void +// +// +// CHECK: Function Attrs: convergent norecurse +// CHECK-LABEL: define {{[^@]+}}@__test_target_features_func_block_invoke_kernel +// CHECK-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5) +// CHECK-NEXT: store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr +// CHECK-NEXT: call void @__test_target_features_func_block_invoke(ptr [[TMP2]]) +// CHECK-NEXT: ret void +// //. -// CHECK: attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" } -// CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -// CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } -// CHECK: attributes #4 = { convergent nounwind "enqueued-block" } -// CHECK: attributes #5 = { convergent } +// CHECK: attributes #0 = { "objc_arc_inert" } +// CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #2 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +// CHECK: attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK: attributes #4 = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CHECK: attributes #5 = { convergent norecurse "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } +// CHECK: attributes #6 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } +// CHECK: attributes #7 = { nocallback nofree nosync nounwind willreturn } +// CHECK: attributes #8 = { convergent norecurse "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" } +// CHECK: attributes #9 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" } +// CHECK: attributes #10 = { convergent norecurse "denormal-fp-math-f32"="preserve-sign,preserve-sign" "enqueued-block" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" } +// CHECK: attributes #11 = { convergent } //. // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400} // CHECK: !1 = !{i32 1, !"wchar_size", i32 4} @@ -315,4 +434,6 @@ // CHECK: !12 = !{!"none", !"none"} // CHECK: !13 = !{!"__block_literal", !"void*"} // CHECK: !14 = !{!"", !""} +// CHECK: !15 = !{i32 1} +// CHECK: !16 = !{!"int*"} //. Index: clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl @@ -0,0 +1,102 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +// RUN: %clang_cc1 -fno-ident -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" -fdenormal-fp-math-f32=preserve-sign -cl-uniform-work-group-size | FileCheck --check-prefix=SPIR32 %s + +// Test that attributes are correctly applied to functions introduced for +// enqueued blocks + + +typedef void (^bl_t)(local void *); +typedef struct {int a;} ndrange_t; + +kernel void device_side_enqueue(global int *a, global int *b, int i) { + queue_t default_queue; + unsigned flags = 0; + ndrange_t ndrange; + + enqueue_kernel(default_queue, flags, ndrange, + ^(void) { + a[i] = b[i]; + }); +} +// SPIR32: Function Attrs: convergent noinline norecurse nounwind optnone +// SPIR32-LABEL: define {{[^@]+}}@device_side_enqueue +// SPIR32-SAME: (ptr addrspace(1) align 4 [[A:%.*]], ptr addrspace(1) align 4 [[B:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { +// SPIR32-NEXT: entry: +// SPIR32-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR32-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// SPIR32-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// SPIR32-NEXT: [[DEFAULT_QUEUE:%.*]] = alloca ptr, align 4 +// SPIR32-NEXT: [[FLAGS:%.*]] = alloca i32, align 4 +// SPIR32-NEXT: [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4 +// SPIR32-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4 +// SPIR32-NEXT: [[BLOCK:%.*]] = alloca <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, align 4 +// SPIR32-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 4 +// SPIR32-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 4 +// SPIR32-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 +// SPIR32-NEXT: store i32 0, ptr [[FLAGS]], align 4 +// SPIR32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEFAULT_QUEUE]], align 4 +// SPIR32-NEXT: [[TMP1:%.*]] = load i32, ptr [[FLAGS]], align 4 +// SPIR32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP]], ptr align 4 [[NDRANGE]], i32 4, i1 false) +// SPIR32-NEXT: [[BLOCK_SIZE:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 0 +// SPIR32-NEXT: store i32 24, ptr [[BLOCK_SIZE]], align 4 +// SPIR32-NEXT: [[BLOCK_ALIGN:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 1 +// SPIR32-NEXT: store i32 4, ptr [[BLOCK_ALIGN]], align 4 +// SPIR32-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 2 +// SPIR32-NEXT: store ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke to ptr addrspace(4)), ptr [[BLOCK_INVOKE]], align 4 +// SPIR32-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 3 +// SPIR32-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 4 +// SPIR32-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[BLOCK_CAPTURED]], align 4 +// SPIR32-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 4 +// SPIR32-NEXT: [[TMP3:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// SPIR32-NEXT: store i32 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 4 +// SPIR32-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr [[BLOCK]], i32 0, i32 5 +// SPIR32-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 4 +// SPIR32-NEXT: store ptr addrspace(1) [[TMP4]], ptr [[BLOCK_CAPTURED2]], align 4 +// SPIR32-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[BLOCK]] to ptr addrspace(4) +// SPIR32-NEXT: [[TMP6:%.*]] = call spir_func i32 @__enqueue_kernel_basic(ptr [[TMP0]], i32 [[TMP1]], ptr byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr addrspace(4) addrspacecast (ptr @__device_side_enqueue_block_invoke_kernel to ptr addrspace(4)), ptr addrspace(4) [[TMP5]]) +// SPIR32-NEXT: ret void +// +// +// SPIR32: Function Attrs: convergent noinline nounwind optnone +// SPIR32-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke +// SPIR32-SAME: (ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// SPIR32-NEXT: entry: +// SPIR32-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr addrspace(4), align 4 +// SPIR32-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr addrspace(4), align 4 +// SPIR32-NEXT: store ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 +// SPIR32-NEXT: store ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 4 +// SPIR32-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 +// SPIR32-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR]], align 4 +// SPIR32-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 +// SPIR32-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR1]], align 4 +// SPIR32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i32 [[TMP1]] +// SPIR32-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[ARRAYIDX]], align 4 +// SPIR32-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 3 +// SPIR32-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[BLOCK_CAPTURE_ADDR2]], align 4 +// SPIR32-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i32, i32, ptr addrspace(4), ptr addrspace(1), i32, ptr addrspace(1) }>, ptr addrspace(4) [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4 +// SPIR32-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[BLOCK_CAPTURE_ADDR3]], align 4 +// SPIR32-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 [[TMP4]] +// SPIR32-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARRAYIDX4]], align 4 +// SPIR32-NEXT: ret void +// +// +// SPIR32: Function Attrs: convergent norecurse +// SPIR32-LABEL: define {{[^@]+}}@__device_side_enqueue_block_invoke_kernel +// SPIR32-SAME: (ptr addrspace(4) [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { +// SPIR32-NEXT: entry: +// SPIR32-NEXT: call spir_func void @__device_side_enqueue_block_invoke(ptr addrspace(4) [[TMP0]]) +// SPIR32-NEXT: ret void +// +//. +// SPIR32: attributes #0 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +// SPIR32: attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// SPIR32: attributes #2 = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// SPIR32: attributes #3 = { convergent norecurse "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } +//. +// SPIR32: !0 = !{i32 1, !"wchar_size", i32 4} +// SPIR32: !1 = !{i32 2, i32 0} +// SPIR32: !2 = !{i32 1, i32 1, i32 0} +// SPIR32: !3 = !{!"none", !"none", !"none"} +// SPIR32: !4 = !{!"int*", !"int*", !"int"} +// SPIR32: !5 = !{!"", !"", !""} +//. Index: clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl =================================================================== --- clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -411,7 +411,7 @@ // COMMON: ret void // COMMON: } // COMMON: define spir_kernel void [[INVLK2]](i8 addrspace(4)*{{.*}}) -// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) [[INVOKE_ATTR:#[0-9]+]] +// COMMON: define spir_kernel void [[INVGK1]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) [[INVOKE_KERNEL_ATTR:#[0-9]+]] // COMMON: define spir_kernel void [[INVGK2]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define spir_kernel void [[INVGK3]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define spir_kernel void [[INVGK4]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) @@ -422,7 +422,7 @@ // COMMON: ret void // COMMON: } // COMMON: define spir_kernel void [[INVGK7]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) -// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) +// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) [[INVG8_INVOKE_FUNC_ATTR:#[0-9]+]] // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) // COMMON: define spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) // COMMON: define spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) @@ -431,4 +431,5 @@ // COMMON: define spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}}) // COMMON: define spir_kernel void [[INVGK11]](i8 addrspace(4)*{{.*}}) -// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind } +// COMMON: attributes [[INVG8_INVOKE_FUNC_ATTR]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// COMMON: attributes [[INVOKE_KERNEL_ATTR]] = { convergent norecurse "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }