diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1256,6 +1256,12 @@ getTriple().getArch() == llvm::Triple::aarch64); } + /// Tests whether the target is an OpenMP-compatible GPU architecture + /// Currently only supports NVPTX and AMDGCN + static bool isOpenMPGPU(llvm::Triple &T) { + return T.isNVPTX() || T.isAMDGCN(); + } + /// Return true if {|} are normal characters in the asm string. /// /// If this returns false (the default), then {abc|xyz} is syntax diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3214,10 +3214,11 @@ Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return 0; - // CUDA does not have device-side standard library. printf and malloc are the - // only special cases that are supported by device-side runtime. - if (Context.getLangOpts().CUDA && hasAttr() && - !hasAttr() && + // CUDA/HIP does not have device-side standard library. printf and malloc are + // the only special cases that are supported by device-side runtime. + if (((Context.getLangOpts().CUDA && hasAttr() && + !hasAttr()) || + Context.getTargetInfo().getTriple().isAMDGCN()) && !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -22,6 +22,7 @@ #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Sanitizers.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/Version.h" #include "clang/Basic/Visibility.h" @@ -3096,9 +3097,11 @@ } } + bool IsOpenMPGPU = clang::TargetInfo::isOpenMPGPU(T); + // Set the flag to prevent the implementation from emitting device exception // handling code for those requiring so. - if ((Opts.OpenMPIsDevice && T.isNVPTX()) || Opts.OpenCLCPlusPlus) { + if ((Opts.OpenMPIsDevice && IsOpenMPGPU) || Opts.OpenCLCPlusPlus) { Opts.Exceptions = 0; Opts.CXXExceptions = 0; } @@ -3132,6 +3135,7 @@ TT.getArch() == llvm::Triple::ppc64le || TT.getArch() == llvm::Triple::nvptx || TT.getArch() == llvm::Triple::nvptx64 || + TT.getArch() == llvm::Triple::amdgcn || TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); @@ -3149,13 +3153,13 @@ << Opts.OMPHostIRFile; } - // Set CUDA mode for OpenMP target NVPTX if specified in options - Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() && + // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options + Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && IsOpenMPGPU && Args.hasArg(options::OPT_fopenmp_cuda_mode); - // Set CUDA mode for OpenMP target NVPTX if specified in options + // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options Opts.OpenMPCUDAForceFullRuntime = - Opts.OpenMPIsDevice && T.isNVPTX() && + Opts.OpenMPIsDevice && IsOpenMPGPU && Args.hasArg(options::OPT_fopenmp_cuda_force_full_runtime); // Record whether the __DEPRECATED define was requested. diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -6,6 +6,7 @@ // REQUIRES: x86-registered-target // REQUIRES: powerpc-registered-target // REQUIRES: nvptx-registered-target +// REQUIRES: amdgpu-registered-target /// ########################################################################### @@ -249,30 +250,49 @@ // HAS_DEBUG-SAME: "--return-at-end" // HAS_DEBUG: nvlink // HAS_DEBUG-SAME: "-g" +// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}" +// CUDA_MODE-SAME: "-fopenmp-cuda-mode" +// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_MODE %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_MODE %s -// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// CUDA_MODE-SAME: "-fopenmp-cuda-mode" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ // RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s -// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode" + +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s + +// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}" +// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime" +// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=FULL_RUNTIME %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=FULL_RUNTIME %s -// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime" // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \ // RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s -// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime" + +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s // RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s diff --git a/clang/test/OpenMP/target_parallel_no_exceptions.cpp b/clang/test/OpenMP/target_parallel_no_exceptions.cpp --- a/clang/test/OpenMP/target_parallel_no_exceptions.cpp +++ b/clang/test/OpenMP/target_parallel_no_exceptions.cpp @@ -1,6 +1,7 @@ /// Make sure no exception messages are inclided in the llvm output. // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION void test_increment() { #pragma omp target diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -692,6 +692,9 @@ return getArch() == Triple::nvptx || getArch() == Triple::nvptx64; } + /// Tests whether the target is AMDGCN + bool isAMDGCN() const { return getArch() == Triple::amdgcn; } + bool isAMDGPU() const { return getArch() == Triple::r600 || getArch() == Triple::amdgcn; }