Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -15,6 +15,7 @@ #include "CGCleanup.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" +#include "TargetInfo.h" #include "clang/AST/APValue.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -6590,6 +6591,8 @@ OutlinedFn->addFnAttr("omp_target_thread_limit", std::to_string(DefaultValThreads)); } + + CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); } /// Checks if the expression is constant or does not have non-trivial function Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -9143,6 +9143,10 @@ public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} + + void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, + CodeGenModule &CGM) const; + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -9182,36 +9186,13 @@ cast(D)->getType()->isCUDADeviceBuiltinTextureType())); } -void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (requiresAMDGPUProtectedVisibility(D, GV)) { - GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); - GV->setDSOLocal(true); - } - - if (GV->isDeclaration()) - return; - const FunctionDecl *FD = dyn_cast_or_null(D); - if (!FD) - return; - - llvm::Function *F = cast(GV); - - const auto *ReqdWGS = M.getLangOpts().OpenCL ? - FD->getAttr() : nullptr; - - - const bool IsOpenCLKernel = M.getLangOpts().OpenCL && - FD->hasAttr(); - const bool IsHIPKernel = M.getLangOpts().HIP && - FD->hasAttr(); - if ((IsOpenCLKernel || IsHIPKernel) && - (M.getTriple().getOS() == llvm::Triple::AMDHSA)) - F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); - - if (IsHIPKernel) - F->addFnAttr("uniform-work-group-size", "true"); - +void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( + const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { + const auto *ReqdWGS = + M.getLangOpts().OpenCL ? FD->getAttr() : nullptr; + const bool IsOpenCLKernel = + M.getLangOpts().OpenCL && FD->hasAttr(); + const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr(); const auto *FlatWGS = FD->getAttr(); if (ReqdWGS || FlatWGS) { @@ -9279,6 +9260,38 @@ if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } +} + +void AMDGPUTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (requiresAMDGPUProtectedVisibility(D, GV)) { + GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); + GV->setDSOLocal(true); + } + + if (GV->isDeclaration()) + return; + + llvm::Function *F = dyn_cast(GV); + if (!F) + return; + + const FunctionDecl *FD = dyn_cast_or_null(D); + if (FD) + setFunctionDeclAttributes(FD, F, M); + + const bool IsOpenCLKernel = + M.getLangOpts().OpenCL && FD && FD->hasAttr(); + const bool IsHIPKernel = + M.getLangOpts().HIP && FD && FD->hasAttr(); + + const bool IsOpenMP = M.getLangOpts().OpenMP && !FD; + if ((IsOpenCLKernel || IsHIPKernel || IsOpenMP) && + (M.getTriple().getOS() == llvm::Triple::AMDHSA)) + F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); + + if (IsHIPKernel) + F->addFnAttr("uniform-work-group-size", "true"); if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); Index: clang/test/OpenMP/amdgcn-attributes.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/amdgcn-attributes.cpp @@ -0,0 +1,43 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=DEFAULT,ALL %s +// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=CPU,ALL %s + +// RUN: %clang_cc1 -menable-no-nans -mno-amdgpu-ieee -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=NOIEEE,ALL %s +// RUN: %clang_cc1 -munsafe-fp-atomics -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=UNSAFEATOMIC,ALL %s + +// expected-no-diagnostics + +#define N 100 + +int callable(int); + +// Check that the target attributes are set on the generated kernel +int func() { + // ALL-LABEL: amdgpu_kernel void @__omp_offloading{{.*}} #0 + + int arr[N]; + +#pragma omp target + for (int i = 0; i < N; i++) { + arr[i] = callable(arr[i]); + } + + return arr[0]; +} + +int callable(int x) { + // ALL-LABEL: @_Z8callablei(i32 %x) #1 + return x + 1; +} + + // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + // CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" } + // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" } +// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }