diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4214,17 +4214,20 @@ /// Returns the canonical name for the offloading architecture when using HIP or /// CUDA. static StringRef getCanonicalArchString(Compilation &C, - llvm::opt::DerivedArgList &Args, + const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - Action::OffloadKind Kind) { - if (Kind == Action::OFK_Cuda) { + Action::OffloadKind Kind, + const ToolChain *TC) { + if (Kind == Action::OFK_Cuda || + (Kind == Action::OFK_OpenMP && TC->getTriple().isNVPTX())) { CudaArch Arch = StringToCudaArch(ArchStr); if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) { C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; return StringRef(); } return Args.MakeArgStringRef(CudaArchToString(Arch)); - } else if (Kind == Action::OFK_HIP) { + } else if (Kind == Action::OFK_HIP || + (Kind == Action::OFK_OpenMP && TC->getTriple().isAMDGPU())) { llvm::StringMap Features; // getHIPOffloadTargetTriple() is known to return valid value as it has // been called successfully in the CreateOffloadingDeviceToolChains(). @@ -4239,7 +4242,8 @@ return Args.MakeArgStringRef( getCanonicalTargetID(Arch.getValue(), Features)); } - return StringRef(); + // If the input isn't CUDA or HIP just return the architecture. + return ArchStr; } /// Checks if the set offloading architectures does not conflict. Returns the @@ -4259,12 +4263,8 @@ /// This function returns a set of bound architectures, if there are no bound /// architctures we return a set containing only the empty string. static llvm::DenseSet -getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind) { - - // If this is OpenMP offloading we don't use a bound architecture. - if (Kind == Action::OFK_OpenMP) - return llvm::DenseSet{StringRef()}; +getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, + Action::OffloadKind Kind, const ToolChain *TC) { // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && @@ -4280,12 +4280,12 @@ llvm::DenseSet Archs; for (auto &Arg : Args) { if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { - Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind)); + Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC)); } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { if (Arg->getValue() == StringRef("all")) Archs.clear(); else - Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind)); + Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC)); } } @@ -4301,6 +4301,11 @@ Archs.insert(CudaArchToString(CudaArch::CudaDefault)); else if (Kind == Action::OFK_HIP) Archs.insert(CudaArchToString(CudaArch::HIPDefault)); + else if (Kind == Action::OFK_OpenMP) + Archs.insert(StringRef()); + } else { + Args.ClaimAllArgs(options::OPT_offload_arch_EQ); + Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ); } return Archs; @@ -4346,7 +4351,8 @@ // Get the product of all bound architectures and toolchains. SmallVector> TCAndArchs; for (const ToolChain *TC : ToolChains) - for (StringRef Arch : getOffloadArchs(C, Args, Kind)) + for (StringRef Arch : getOffloadArchs( + C, C.getArgsForToolChain(TC, "generic", Kind), Kind, TC)) TCAndArchs.push_back(std::make_pair(TC, Arch)); for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I) @@ -4375,9 +4381,9 @@ HostAction->setCannotBeCollapsedWithNextDependentAction(); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, Kind); + TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; - DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind); + DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); A = C.MakeAction(HDep, DDep); } else if (isa(A) && Kind == Action::OFK_Cuda) { // The Cuda toolchain uses fatbinary as the linker phase to bundle the diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -307,9 +307,10 @@ if (!llvm::is_contained(*DAL, A)) DAL->append(A); - std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str(); - if (Arch.empty()) { - checkSystemForAMDGPU(Args, *this, Arch); + if (!DAL->hasArg(options::OPT_march_EQ)) { + std::string Arch = BoundArch.str(); + if (BoundArch.empty()) + checkSystemForAMDGPU(Args, *this, Arch); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -847,10 +847,10 @@ if (!llvm::is_contained(*DAL, A)) DAL->append(A); - StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ); - if (Arch.empty()) + if (!DAL->hasArg(options::OPT_march_EQ)) DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), - CLANG_OPENMP_NVPTX_DEFAULT_ARCH); + !BoundArch.empty() ? BoundArch + : CLANG_OPENMP_NVPTX_DEFAULT_ARCH); return DAL; } diff --git a/clang/test/Driver/amdgpu-openmp-toolchain-new.c b/clang/test/Driver/amdgpu-openmp-toolchain-new.c --- a/clang/test/Driver/amdgpu-openmp-toolchain-new.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain-new.c @@ -3,6 +3,9 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ // RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \ // RUN: | FileCheck %s +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +// RUN: --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \ +// RUN: | FileCheck %s // verify the tools invocations // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" @@ -34,6 +37,7 @@ // CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}} // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" // CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_BC]]"], output: "[[HOST_OBJ:.*]]" diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c --- a/clang/test/Driver/openmp-offload-gpu-new.c +++ b/clang/test/Driver/openmp-offload-gpu-new.c @@ -10,6 +10,10 @@ // RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \ // RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ // RUN: | FileCheck %s +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --offload-arch=sm_52 \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ +// RUN: | FileCheck %s // verify the tools invocations // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" @@ -40,6 +44,27 @@ // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \ +// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU + +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]" +// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]" +// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[HOST_OBJ:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + // RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR // CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"