Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp @@ -268,18 +268,22 @@ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUASI.GLOBAL_ADDRESS); - auto CallsPrintf = Func.getParent()->getNamedMetadata("llvm.printf.fmts"); - if (CallsPrintf) + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); + else + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); + + // Emit "default queue" and "completion action" arguments if enqueue kernel is + // used, otherwise emit dummy "none" arguments. if (Func.hasFnAttribute("calls-enqueue-kernel")) { - if (!CallsPrintf) { - // Emit a dummy argument so that the remaining hidden arguments - // have a fixed position relative to the first hidden argument. - // This is to facilitate library code to access hidden arguments. - emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); - } emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); + } else { + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); } } Index: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll +++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll @@ -30,7 +30,6 @@ ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ ; CHECK-NEXT: ValueType: I64 -; CHECK-NOT: ValueKind: HiddenNone ; CHECK-NOT: ValueKind: HiddenDefaultQueue ; CHECK-NOT: ValueKind: HiddenCompletionAction define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) Index: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll +++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll @@ -0,0 +1,70 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] +; CHECK: Kernels: + +; CHECK: - Name: test +; CHECK: SymbolName: 'test@kd' +; CHECK: Args: +; CHECK-NEXT: - Name: r +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F16 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Name: a +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F16 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Name: b +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F16 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +!opencl.ocl.version = !{!0} +!0 = !{i32 2, i32 0}